model_id,problem_id,category,subcategory,self_eval_answer,self_eval_correct,self_eval_confidence,judge_a_answer,judge_a_confidence,judge_b_answer,judge_b_confidence,judge_c_answer,judge_c_confidence,correct_answer
Qwen2.5-7B,test/precalculus/807.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/precalculus/927.json,Precalculus,general,A,1.0,1.0,B,0.65,A,0.85,,,A
Qwen2.5-7B,test/precalculus/1303.json,Precalculus,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,B
Qwen2.5-7B,test/precalculus/990.json,Precalculus,general,B,0.0,0.95,A,0.75,A,0.85,,,A
Qwen2.5-7B,test/precalculus/1199.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.85,,,A
Qwen2.5-7B,test/precalculus/779.json,Precalculus,general,B,1.0,0.9,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/precalculus/285.json,Precalculus,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/precalculus/1105.json,Precalculus,general,A,0.0,0.8,B,0.95,B,0.95,B,0.95,A
Qwen2.5-7B,test/precalculus/675.json,Precalculus,general,B,0.0,0.9,A,0.8,B,0.95,A,1.0,A
Qwen2.5-7B,test/precalculus/1146.json,Precalculus,general,B,1.0,0.9,B,0.75,B,0.7,B,0.7,B
Qwen2.5-7B,test/precalculus/1313.json,Precalculus,general,A,1.0,0.95,A,0.9,A,0.85,A,0.9,B
Qwen2.5-7B,test/precalculus/24313.json,Precalculus,general,A,0.0,0.9,B,1.0,B,0.95,B,1.0,A
Qwen2.5-7B,test/precalculus/34.json,Precalculus,general,A,1.0,0.95,B,0.51,A,0.95,A,0.95,A
Qwen2.5-7B,test/precalculus/1300.json,Precalculus,general,A,0.0,0.9,B,0.75,B,0.85,B,0.8,B
Qwen2.5-7B,test/precalculus/44.json,Precalculus,general,A,0.0,0.8,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/precalculus/477.json,Precalculus,general,A,1.0,0.9,A,0.6,A,0.95,A,1.0,B
Qwen2.5-7B,test/precalculus/43.json,Precalculus,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/precalculus/986.json,Precalculus,general,A,0.0,0.85,B,1.0,B,0.95,B,1.0,B
Qwen2.5-7B,test/precalculus/117.json,Precalculus,general,B,0.0,0.95,A,0.85,B,0.85,A,0.7,A
Qwen2.5-7B,test/precalculus/697.json,Precalculus,general,A,0.0,0.95,B,0.7,A,0.6,B,0.9,B
Qwen2.5-7B,test/precalculus/659.json,Precalculus,general,A,0.0,0.9,B,0.95,B,0.95,B,1.0,A
Qwen2.5-7B,test/precalculus/263.json,Precalculus,general,A,0.0,0.95,B,0.75,B,1.0,B,0.95,A
Qwen2.5-7B,test/precalculus/541.json,Precalculus,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/precalculus/190.json,Precalculus,general,A,1.0,0.95,B,0.65,A,0.6,A,0.7,A
Qwen2.5-7B,test/precalculus/819.json,Precalculus,general,A,0.0,0.95,B,0.6,A,0.95,B,0.9,A
Qwen2.5-7B,test/precalculus/1056.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.98,B,1.0,B
Qwen2.5-7B,test/precalculus/441.json,Precalculus,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/precalculus/989.json,Precalculus,general,A,1.0,0.85,A,0.75,B,0.7,A,0.8,A
Qwen2.5-7B,test/precalculus/920.json,Precalculus,general,B,0.0,0.8,B,0.75,A,0.85,,,B
Qwen2.5-7B,test/precalculus/452.json,Precalculus,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/precalculus/580.json,Precalculus,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/precalculus/768.json,Precalculus,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,B
Qwen2.5-7B,test/precalculus/1172.json,Precalculus,general,A,1.0,0.85,B,0.65,A,0.7,,,B
Qwen2.5-7B,test/precalculus/1201.json,Precalculus,general,A,0.0,0.95,B,0.7,B,0.85,,,B
Qwen2.5-7B,test/precalculus/881.json,Precalculus,general,A,0.0,0.9,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B,test/precalculus/695.json,Precalculus,general,A,1.0,0.95,B,0.6,A,0.95,A,0.95,A
Qwen2.5-7B,test/precalculus/742.json,Precalculus,general,B,1.0,0.95,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B,test/precalculus/801.json,Precalculus,general,B,0.0,0.9,A,0.95,A,0.85,A,0.8,B
Qwen2.5-7B,test/precalculus/826.json,Precalculus,general,A,0.0,0.85,A,0.85,B,0.95,B,0.95,A
Qwen2.5-7B,test/precalculus/1281.json,Precalculus,general,A,1.0,1.0,A,0.65,A,0.75,,,A
Qwen2.5-7B,test/precalculus/96.json,Precalculus,general,A,1.0,1.0,B,0.9,A,0.95,A,1.0,A
Qwen2.5-7B,test/precalculus/1289.json,Precalculus,general,A,0.0,0.95,A,0.6,B,0.95,B,0.95,B
Qwen2.5-7B,test/precalculus/902.json,Precalculus,general,A,0.0,0.95,B,0.6,B,0.85,,,B
Qwen2.5-7B,test/precalculus/1291.json,Precalculus,general,A,0.0,0.85,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B,test/precalculus/398.json,Precalculus,general,A,1.0,1.0,A,1.0,A,0.95,A,1.0,B
Qwen2.5-7B,test/precalculus/681.json,Precalculus,general,A,1.0,0.9,A,0.9,A,0.95,A,0.95,A
Qwen2.5-7B,test/precalculus/145.json,Precalculus,general,A,1.0,0.85,A,1.0,A,0.99,A,1.0,A
Qwen2.5-7B,test/precalculus/625.json,Precalculus,general,A,1.0,0.9,A,0.95,B,0.95,A,0.95,B
Qwen2.5-7B,test/precalculus/1202.json,Precalculus,general,B,1.0,0.85,B,0.75,B,0.75,B,0.7,B
Qwen2.5-7B,test/precalculus/1133.json,Precalculus,general,B,1.0,0.85,B,0.65,B,0.9,,,A
Qwen2.5-7B,test/precalculus/499.json,Precalculus,general,A,0.0,1.0,B,1.0,B,0.98,B,1.0,B
Qwen2.5-7B,test/precalculus/323.json,Precalculus,general,A,0.0,1.0,B,1.0,B,0.98,B,1.0,A
Qwen2.5-7B,test/precalculus/703.json,Precalculus,general,A,0.0,0.8,B,0.9,B,0.9,B,0.95,B
Qwen2.5-7B,test/precalculus/1252.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.98,A,0.9,A
Qwen2.5-7B,test/precalculus/1082.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/precalculus/356.json,Precalculus,general,A,0.0,1.0,B,0.85,A,0.95,B,1.0,A
Qwen2.5-7B,test/intermediate_algebra/1994.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,B
Qwen2.5-7B,test/intermediate_algebra/1197.json,Intermediate Algebra,general,B,1.0,0.95,B,0.65,B,0.85,B,0.7,B
Qwen2.5-7B,test/intermediate_algebra/134.json,Intermediate Algebra,general,A,0.0,0.9,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B,test/intermediate_algebra/1000.json,Intermediate Algebra,general,B,0.0,0.95,B,0.6,A,0.6,A,0.7,A
Qwen2.5-7B,test/intermediate_algebra/607.json,Intermediate Algebra,general,A,1.0,0.7,B,0.6,A,0.6,,,B
Qwen2.5-7B,test/intermediate_algebra/1388.json,Intermediate Algebra,general,A,1.0,1.0,A,1.0,A,0.98,A,1.0,B
Qwen2.5-7B,test/intermediate_algebra/428.json,Intermediate Algebra,general,A,0.0,0.85,B,0.95,B,0.95,B,1.0,A
Qwen2.5-7B,test/intermediate_algebra/1454.json,Intermediate Algebra,general,A,1.0,0.9,A,0.7,A,0.85,,,A
Qwen2.5-7B,test/intermediate_algebra/1217.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,B,0.95,A,0.95,A
Qwen2.5-7B,test/intermediate_algebra/1168.json,Intermediate Algebra,general,A,1.0,0.9,A,0.9,A,0.95,A,1.0,A
Qwen2.5-7B,test/intermediate_algebra/956.json,Intermediate Algebra,general,A,0.0,0.85,B,0.6,B,0.6,,,A
Qwen2.5-7B,test/intermediate_algebra/1247.json,Intermediate Algebra,general,A,1.0,0.9,A,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/intermediate_algebra/279.json,Intermediate Algebra,general,A,0.0,0.95,B,0.7,B,0.9,A,0.7,A
Qwen2.5-7B,test/intermediate_algebra/207.json,Intermediate Algebra,general,A,1.0,0.85,B,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/intermediate_algebra/623.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,A
Qwen2.5-7B,test/intermediate_algebra/47.json,Intermediate Algebra,general,B,1.0,0.9,B,0.75,B,0.85,,,B
Qwen2.5-7B,test/intermediate_algebra/1849.json,Intermediate Algebra,general,A,1.0,1.0,A,0.65,A,0.75,A,0.65,A
Qwen2.5-7B,test/intermediate_algebra/2046.json,Intermediate Algebra,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,B
Qwen2.5-7B,test/intermediate_algebra/662.json,Intermediate Algebra,general,B,0.0,0.9,A,0.7,B,0.7,A,0.65,B
Qwen2.5-7B,test/intermediate_algebra/582.json,Intermediate Algebra,general,B,1.0,0.85,B,0.85,B,0.9,,,B
Qwen2.5-7B,test/intermediate_algebra/431.json,Intermediate Algebra,general,B,1.0,0.9,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B,test/intermediate_algebra/558.json,Intermediate Algebra,general,B,1.0,0.9,B,0.6,B,0.6,,,A
Qwen2.5-7B,test/intermediate_algebra/362.json,Intermediate Algebra,general,B,1.0,0.9,B,0.75,B,0.65,,,A
Qwen2.5-7B,test/intermediate_algebra/515.json,Intermediate Algebra,general,A,1.0,0.9,A,0.6,A,0.95,A,0.95,A
Qwen2.5-7B,test/intermediate_algebra/894.json,Intermediate Algebra,general,A,1.0,0.9,A,1.0,A,1.0,A,0.95,A
Qwen2.5-7B,test/intermediate_algebra/345.json,Intermediate Algebra,general,B,1.0,0.85,A,0.85,B,0.7,B,0.75,B
Qwen2.5-7B,test/intermediate_algebra/1898.json,Intermediate Algebra,general,A,1.0,0.8,A,1.0,A,1.0,A,1.0,A
Qwen2.5-7B,test/intermediate_algebra/232.json,Intermediate Algebra,general,A,0.0,0.95,A,0.85,B,0.95,,,B
Qwen2.5-7B,test/intermediate_algebra/128.json,Intermediate Algebra,general,A,1.0,0.9,A,0.85,A,0.95,A,1.0,A
Qwen2.5-7B,test/intermediate_algebra/1063.json,Intermediate Algebra,general,A,1.0,0.8,A,0.75,A,0.75,,,B
Qwen2.5-7B,test/intermediate_algebra/1126.json,Intermediate Algebra,general,B,1.0,0.95,B,1.0,B,1.0,B,1.0,A
Qwen2.5-7B,test/intermediate_algebra/2022.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,1.0,A
Qwen2.5-7B,test/intermediate_algebra/1151.json,Intermediate Algebra,general,B,1.0,0.85,B,0.98,B,0.98,B,1.0,B
Qwen2.5-7B,test/intermediate_algebra/1408.json,Intermediate Algebra,general,A,1.0,0.75,A,0.95,A,0.95,A,1.0,A
Qwen2.5-7B,test/intermediate_algebra/966.json,Intermediate Algebra,general,A,1.0,0.95,A,1.0,A,0.98,A,1.0,A
Qwen2.5-7B,test/intermediate_algebra/964.json,Intermediate Algebra,general,B,1.0,0.85,B,0.7,A,0.6,,,A
Qwen2.5-7B,test/intermediate_algebra/1410.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,1.0,A
Qwen2.5-7B,test/intermediate_algebra/991.json,Intermediate Algebra,general,B,1.0,0.8,B,1.0,B,0.98,B,1.0,B
Qwen2.5-7B,test/intermediate_algebra/183.json,Intermediate Algebra,general,A,0.0,0.8,B,0.7,B,0.6,B,0.95,B
Qwen2.5-7B,test/intermediate_algebra/1422.json,Intermediate Algebra,general,A,1.0,0.95,A,0.65,A,0.95,A,0.95,A
Qwen2.5-7B,test/intermediate_algebra/2196.json,Intermediate Algebra,general,B,1.0,0.85,B,0.98,B,0.95,B,1.0,A
Qwen2.5-7B,test/intermediate_algebra/591.json,Intermediate Algebra,general,A,0.0,0.85,B,0.85,B,0.95,,,B
Qwen2.5-7B,test/intermediate_algebra/1555.json,Intermediate Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Qwen2.5-7B,test/intermediate_algebra/1510.json,Intermediate Algebra,general,A,0.0,0.95,B,0.75,A,0.6,,,B
Qwen2.5-7B,test/intermediate_algebra/102.json,Intermediate Algebra,general,A,0.0,1.0,B,0.75,A,0.5,B,0.9,A
Qwen2.5-7B,test/intermediate_algebra/986.json,Intermediate Algebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,A
Qwen2.5-7B,test/intermediate_algebra/1354.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.95,,,A
Qwen2.5-7B,test/intermediate_algebra/1837.json,Intermediate Algebra,general,A,1.0,0.85,A,0.85,A,0.98,A,1.0,B
Qwen2.5-7B,test/intermediate_algebra/337.json,Intermediate Algebra,general,A,1.0,0.95,B,0.51,A,0.98,A,1.0,A
Qwen2.5-7B,test/intermediate_algebra/1210.json,Intermediate Algebra,general,A,1.0,0.9,A,0.85,A,0.85,,,B
Qwen2.5-7B,test/intermediate_algebra/1123.json,Intermediate Algebra,general,A,0.0,0.9,B,0.55,A,0.95,B,0.95,A
Qwen2.5-7B,test/intermediate_algebra/149.json,Intermediate Algebra,general,A,1.0,0.9,A,0.6,A,0.95,A,0.95,B
Qwen2.5-7B,test/intermediate_algebra/1411.json,Intermediate Algebra,general,B,1.0,0.85,A,0.6,B,0.85,B,0.7,A
Qwen2.5-7B,test/intermediate_algebra/960.json,Intermediate Algebra,general,B,0.0,0.85,B,0.6,A,0.5,A,0.6,B
Qwen2.5-7B,test/intermediate_algebra/1300.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/intermediate_algebra/90.json,Intermediate Algebra,general,A,0.0,1.0,B,0.95,B,0.98,B,0.9,B
Qwen2.5-7B,test/intermediate_algebra/754.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,A,0.95,B,0.9,A
Qwen2.5-7B,test/intermediate_algebra/446.json,Intermediate Algebra,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/intermediate_algebra/1544.json,Intermediate Algebra,general,A,0.0,0.9,B,0.75,B,0.7,A,0.7,B
Qwen2.5-7B,test/intermediate_algebra/1714.json,Intermediate Algebra,general,A,0.0,0.9,B,0.75,B,0.95,B,0.95,B
Qwen2.5-7B,test/intermediate_algebra/2152.json,Intermediate Algebra,general,A,0.0,1.0,B,1.0,B,0.95,B,1.0,B
Qwen2.5-7B,test/intermediate_algebra/117.json,Intermediate Algebra,general,A,0.0,0.9,B,0.65,B,0.7,B,0.7,B
Qwen2.5-7B,test/intermediate_algebra/190.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.6,A,0.6,A
Qwen2.5-7B,test/intermediate_algebra/776.json,Intermediate Algebra,general,A,0.0,0.85,B,0.6,B,0.6,,,A
Qwen2.5-7B,test/intermediate_algebra/1566.json,Intermediate Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,B
Qwen2.5-7B,test/intermediate_algebra/1572.json,Intermediate Algebra,general,A,1.0,0.9,B,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/intermediate_algebra/1166.json,Intermediate Algebra,general,B,1.0,0.8,B,0.9,B,0.95,B,0.75,B
Qwen2.5-7B,test/intermediate_algebra/860.json,Intermediate Algebra,general,A,1.0,0.8,A,1.0,A,0.95,A,1.0,B
Qwen2.5-7B,test/intermediate_algebra/1407.json,Intermediate Algebra,general,A,0.0,0.9,B,0.8,B,0.85,B,0.8,A
Qwen2.5-7B,test/intermediate_algebra/1405.json,Intermediate Algebra,general,A,1.0,0.8,A,1.0,A,0.98,A,1.0,B
Qwen2.5-7B,test/intermediate_algebra/690.json,Intermediate Algebra,general,A,0.0,0.9,B,0.75,B,0.65,B,0.8,B
Qwen2.5-7B,test/intermediate_algebra/955.json,Intermediate Algebra,general,A,1.0,0.85,A,0.65,A,0.85,A,0.7,A
Qwen2.5-7B,test/intermediate_algebra/1992.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Qwen2.5-7B,test/intermediate_algebra/1111.json,Intermediate Algebra,general,A,0.0,0.9,B,1.0,B,0.98,B,1.0,B
Qwen2.5-7B,test/intermediate_algebra/1791.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.6,A,0.95,B
Qwen2.5-7B,test/intermediate_algebra/1806.json,Intermediate Algebra,general,B,1.0,0.95,B,0.75,B,0.75,B,0.6,B
Qwen2.5-7B,test/intermediate_algebra/1797.json,Intermediate Algebra,general,B,1.0,0.95,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B,test/intermediate_algebra/2146.json,Intermediate Algebra,general,A,0.0,0.95,B,0.8,B,0.95,,,A
Qwen2.5-7B,test/intermediate_algebra/2015.json,Intermediate Algebra,general,A,1.0,1.0,B,0.6,A,0.85,,,B
Qwen2.5-7B,test/intermediate_algebra/121.json,Intermediate Algebra,general,A,0.0,0.9,B,0.65,B,0.5,A,0.8,A
Qwen2.5-7B,test/intermediate_algebra/1014.json,Intermediate Algebra,general,A,0.0,0.8,B,0.98,B,0.98,B,1.0,B
Qwen2.5-7B,test/intermediate_algebra/1462.json,Intermediate Algebra,general,A,1.0,0.8,B,0.6,A,0.85,A,0.7,A
Qwen2.5-7B,test/intermediate_algebra/199.json,Intermediate Algebra,general,B,1.0,0.95,B,1.0,B,0.95,B,1.0,B
Qwen2.5-7B,test/intermediate_algebra/1779.json,Intermediate Algebra,general,A,0.0,0.8,B,0.65,B,0.85,,,B
Qwen2.5-7B,test/intermediate_algebra/1102.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,1.0,B,1.0,A
Qwen2.5-7B,test/intermediate_algebra/834.json,Intermediate Algebra,general,A,0.0,1.0,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B,test/intermediate_algebra/158.json,Intermediate Algebra,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,B
Qwen2.5-7B,test/intermediate_algebra/752.json,Intermediate Algebra,general,A,0.0,0.95,B,0.55,A,0.95,B,0.95,B
Qwen2.5-7B,test/intermediate_algebra/1279.json,Intermediate Algebra,general,A,0.0,0.9,B,0.75,B,0.6,,,A
Qwen2.5-7B,test/intermediate_algebra/1467.json,Intermediate Algebra,general,A,1.0,1.0,A,0.7,A,0.7,A,0.65,A
Qwen2.5-7B,test/intermediate_algebra/101.json,Intermediate Algebra,general,B,0.0,0.95,A,0.75,A,0.95,A,0.95,A
Qwen2.5-7B,test/intermediate_algebra/1365.json,Intermediate Algebra,general,A,0.0,1.0,B,1.0,B,0.95,B,1.0,B
Qwen2.5-7B,test/intermediate_algebra/1350.json,Intermediate Algebra,general,A,1.0,0.95,A,0.9,A,0.85,B,0.7,A
Qwen2.5-7B,test/intermediate_algebra/1930.json,Intermediate Algebra,general,A,0.0,0.9,B,0.7,B,0.65,B,0.6,A
Qwen2.5-7B,test/intermediate_algebra/1981.json,Intermediate Algebra,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,B
Qwen2.5-7B,test/intermediate_algebra/1232.json,Intermediate Algebra,general,A,0.0,0.95,B,0.6,A,0.95,B,0.9,B
Qwen2.5-7B,test/intermediate_algebra/1508.json,Intermediate Algebra,general,B,1.0,0.9,B,0.85,B,0.85,B,0.7,A
Qwen2.5-7B,test/algebra/2584.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,1.0,A
Qwen2.5-7B,test/algebra/1349.json,Algebra,general,A,1.0,0.75,B,0.9,A,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/2036.json,Algebra,general,A,0.0,0.9,B,0.95,A,0.95,B,0.9,A
Qwen2.5-7B,test/algebra/1098.json,Algebra,general,A,1.0,0.9,B,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/1837.json,Algebra,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,A
Qwen2.5-7B,test/algebra/2193.json,Algebra,general,A,0.0,0.8,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/2427.json,Algebra,general,A,1.0,0.95,A,0.8,A,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/1072.json,Algebra,general,B,0.0,0.9,B,0.7,A,0.75,A,0.8,A
Qwen2.5-7B,test/algebra/24.json,Algebra,general,B,1.0,1.0,B,0.95,B,0.95,B,0.95,A
Qwen2.5-7B,test/algebra/2214.json,Algebra,general,A,1.0,0.9,B,0.7,A,0.95,A,1.0,A
Qwen2.5-7B,test/algebra/305.json,Algebra,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,B
Qwen2.5-7B,test/algebra/1265.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,1.0,B
Qwen2.5-7B,test/algebra/187.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/769.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/722.json,Algebra,general,A,1.0,0.85,A,0.95,A,0.95,A,1.0,B
Qwen2.5-7B,test/algebra/2046.json,Algebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,B
Qwen2.5-7B,test/algebra/2253.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/1004.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,1.0,B
Qwen2.5-7B,test/algebra/1035.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/2700.json,Algebra,general,A,0.0,0.85,B,1.0,B,0.98,B,1.0,B
Qwen2.5-7B,test/algebra/893.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/567.json,Algebra,general,A,1.0,0.95,A,0.6,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/892.json,Algebra,general,A,0.0,0.85,B,0.6,B,0.5,A,0.7,B
Qwen2.5-7B,test/algebra/2023.json,Algebra,general,A,0.0,0.95,B,0.6,B,0.95,B,0.4,B
Qwen2.5-7B,test/algebra/873.json,Algebra,general,B,1.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B,test/algebra/2058.json,Algebra,general,A,0.0,0.9,B,0.55,A,0.95,B,0.95,A
Qwen2.5-7B,test/algebra/2593.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/2157.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/2251.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.9,A
Qwen2.5-7B,test/algebra/1332.json,Algebra,general,A,0.0,0.85,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/972.json,Algebra,general,A,0.0,0.85,B,0.85,B,0.99,B,0.95,B
Qwen2.5-7B,test/algebra/2232.json,Algebra,general,A,1.0,0.8,A,0.95,A,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/661.json,Algebra,general,A,0.0,0.95,B,0.99,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/246.json,Algebra,general,A,0.0,0.8,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B,test/algebra/1519.json,Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/988.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/2570.json,Algebra,general,A,0.0,0.95,B,0.55,B,1.0,B,0.7,A
Qwen2.5-7B,test/algebra/621.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,B
Qwen2.5-7B,test/algebra/1255.json,Algebra,general,A,1.0,1.0,A,0.9,A,0.95,B,0.95,A
Qwen2.5-7B,test/algebra/2517.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.98,A,1.0,A
Qwen2.5-7B,test/algebra/478.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,1.0,B
Qwen2.5-7B,test/algebra/297.json,Algebra,general,A,1.0,0.85,B,0.65,A,0.8,,,A
Qwen2.5-7B,test/algebra/841.json,Algebra,general,A,0.0,0.85,B,0.95,B,0.98,B,1.0,A
Qwen2.5-7B,test/algebra/686.json,Algebra,general,A,0.0,0.95,B,0.65,A,0.95,B,0.95,A
Qwen2.5-7B,test/algebra/351.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/1275.json,Algebra,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/1082.json,Algebra,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,A
Qwen2.5-7B,test/algebra/1214.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,B,0.85,A
Qwen2.5-7B,test/algebra/2199.json,Algebra,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/733.json,Algebra,general,B,1.0,0.8,B,0.85,A,0.6,,,A
Qwen2.5-7B,test/algebra/109.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/1937.json,Algebra,general,A,0.0,0.85,B,0.95,B,0.95,B,1.0,A
Qwen2.5-7B,test/algebra/291.json,Algebra,general,B,1.0,1.0,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/2102.json,Algebra,general,A,1.0,1.0,A,0.6,A,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/907.json,Algebra,general,B,1.0,0.9,B,0.95,B,0.95,B,1.0,A
Qwen2.5-7B,test/algebra/864.json,Algebra,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/2159.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/1578.json,Algebra,general,A,0.0,0.95,B,0.9,A,0.95,B,0.95,A
Qwen2.5-7B,test/algebra/975.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/1143.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Qwen2.5-7B,test/algebra/2626.json,Algebra,general,A,0.0,0.95,B,1.0,B,0.99,B,1.0,B
Qwen2.5-7B,test/algebra/1787.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/1934.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,B
Qwen2.5-7B,test/algebra/2064.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B,test/algebra/694.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.98,B,0.95,B
Qwen2.5-7B,test/algebra/524.json,Algebra,general,A,1.0,0.9,A,0.8,A,0.99,B,0.95,B
Qwen2.5-7B,test/algebra/2551.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/346.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/1282.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.5,A,0.8,A
Qwen2.5-7B,test/algebra/1184.json,Algebra,general,A,1.0,1.0,B,0.95,A,0.95,,,B
Qwen2.5-7B,test/algebra/634.json,Algebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/2486.json,Algebra,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/2257.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,A
Qwen2.5-7B,test/algebra/1842.json,Algebra,general,A,1.0,0.9,A,0.8,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/791.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.95,B,0.95,A
Qwen2.5-7B,test/algebra/276.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/2735.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/425.json,Algebra,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/1936.json,Algebra,general,B,1.0,0.95,B,0.8,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/2176.json,Algebra,general,B,0.0,0.9,B,0.85,A,0.85,,,B
Qwen2.5-7B,test/algebra/509.json,Algebra,general,A,1.0,0.95,B,0.55,A,0.95,A,1.0,A
Qwen2.5-7B,test/algebra/1457.json,Algebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
Qwen2.5-7B,test/algebra/2592.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/858.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/1529.json,Algebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,B
Qwen2.5-7B,test/algebra/1338.json,Algebra,general,A,1.0,0.85,A,0.95,A,0.95,B,0.95,A
Qwen2.5-7B,test/algebra/1547.json,Algebra,general,A,1.0,0.9,A,0.51,A,0.95,B,0.95,A
Qwen2.5-7B,test/algebra/529.json,Algebra,general,A,0.0,0.95,B,0.75,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/1078.json,Algebra,general,A,1.0,0.8,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/251.json,Algebra,general,A,0.0,0.85,B,0.95,B,0.95,B,0.9,A
Qwen2.5-7B,test/algebra/1199.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/2264.json,Algebra,general,A,1.0,0.9,B,0.8,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/1303.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/101.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
Qwen2.5-7B,test/algebra/170.json,Algebra,general,A,1.0,0.95,A,0.55,A,0.95,A,1.0,A
Qwen2.5-7B,test/algebra/849.json,Algebra,general,B,1.0,0.85,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B,test/algebra/1031.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.98,B,1.0,A
Qwen2.5-7B,test/algebra/853.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.98,A,1.0,A
Qwen2.5-7B,test/algebra/2277.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.7,B
Qwen2.5-7B,test/algebra/518.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/114.json,Algebra,general,A,0.0,0.9,B,0.95,B,1.0,B,1.0,B
Qwen2.5-7B,test/algebra/1960.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B,test/algebra/2680.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/2391.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.98,B,0.95,A
Qwen2.5-7B,test/algebra/776.json,Algebra,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,B
Qwen2.5-7B,test/algebra/1796.json,Algebra,general,A,1.0,0.95,A,0.7,A,0.95,A,1.0,A
Qwen2.5-7B,test/algebra/1339.json,Algebra,general,A,0.0,0.9,B,0.9,B,0.95,B,1.0,B
Qwen2.5-7B,test/algebra/2743.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/2043.json,Algebra,general,A,0.0,0.95,B,1.0,B,0.98,B,1.0,B
Qwen2.5-7B,test/algebra/1553.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.9,B,1.0,B
Qwen2.5-7B,test/algebra/2080.json,Algebra,general,A,0.0,0.9,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/1343.json,Algebra,general,A,1.0,0.9,A,0.9,A,0.98,A,0.95,B
Qwen2.5-7B,test/algebra/668.json,Algebra,general,A,1.0,0.95,B,0.75,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/2430.json,Algebra,general,A,0.0,0.9,B,0.6,A,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/2789.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/algebra/1814.json,Algebra,general,A,1.0,0.95,A,0.6,A,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/2476.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.9,A
Qwen2.5-7B,test/algebra/2780.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/824.json,Algebra,general,B,1.0,0.9,B,0.6,B,0.95,B,0.95,B
Qwen2.5-7B,test/algebra/1425.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,1.0,B
Qwen2.5-7B,test/algebra/224.json,Algebra,general,A,1.0,0.95,A,0.8,A,0.95,B,0.9,B
Qwen2.5-7B,test/algebra/435.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/2470.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/algebra/2779.json,Algebra,general,A,1.0,1.0,A,1.0,A,1.0,,,B
Qwen2.5-7B,test/number_theory/572.json,Number Theory,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/number_theory/515.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/number_theory/1032.json,Number Theory,general,A,0.0,0.9,B,0.7,B,0.95,B,1.0,A
Qwen2.5-7B,test/number_theory/737.json,Number Theory,general,A,0.0,0.85,B,0.95,B,0.98,B,1.0,A
Qwen2.5-7B,test/number_theory/864.json,Number Theory,general,A,0.0,1.0,B,1.0,B,0.95,B,1.0,A
Qwen2.5-7B,test/number_theory/627.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
Qwen2.5-7B,test/number_theory/45.json,Number Theory,general,A,1.0,0.7,A,0.7,A,0.95,A,0.8,A
Qwen2.5-7B,test/number_theory/1055.json,Number Theory,general,A,1.0,0.9,A,1.0,A,0.95,A,1.0,A
Qwen2.5-7B,test/number_theory/46.json,Number Theory,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,B
Qwen2.5-7B,test/number_theory/516.json,Number Theory,general,A,0.0,0.95,B,0.65,B,0.85,,,A
Qwen2.5-7B,test/number_theory/357.json,Number Theory,general,A,1.0,1.0,A,1.0,A,1.0,A,0.8,A
Qwen2.5-7B,test/number_theory/914.json,Number Theory,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/number_theory/847.json,Number Theory,general,A,0.0,1.0,B,0.95,B,0.95,B,1.0,A
Qwen2.5-7B,test/number_theory/753.json,Number Theory,general,A,1.0,0.85,A,1.0,A,1.0,A,1.0,A
Qwen2.5-7B,test/number_theory/1257.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/number_theory/156.json,Number Theory,general,A,1.0,0.85,A,1.0,A,0.95,A,0.95,A
Qwen2.5-7B,test/number_theory/612.json,Number Theory,general,B,0.0,0.9,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/number_theory/931.json,Number Theory,general,A,0.0,0.9,B,0.9,A,0.99,B,0.95,B
Qwen2.5-7B,test/number_theory/521.json,Number Theory,general,B,1.0,0.9,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B,test/number_theory/598.json,Number Theory,general,A,1.0,1.0,A,1.0,A,0.9,A,1.0,B
Qwen2.5-7B,test/number_theory/978.json,Number Theory,general,B,0.0,0.95,A,0.85,A,0.95,A,1.0,A
Qwen2.5-7B,test/number_theory/838.json,Number Theory,general,A,0.0,0.95,B,1.0,B,1.0,B,1.0,A
Qwen2.5-7B,test/number_theory/149.json,Number Theory,general,A,1.0,0.95,B,0.6,A,0.95,A,0.95,A
Qwen2.5-7B,test/number_theory/1201.json,Number Theory,general,B,1.0,0.9,B,0.85,B,0.95,A,0.95,B
Qwen2.5-7B,test/number_theory/234.json,Number Theory,general,A,1.0,0.95,A,0.98,A,0.95,A,1.0,A
Qwen2.5-7B,test/number_theory/417.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.98,A,0.95,B
Qwen2.5-7B,test/number_theory/89.json,Number Theory,general,A,1.0,0.95,B,0.6,A,0.95,A,0.95,A
Qwen2.5-7B,test/number_theory/183.json,Number Theory,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,B
Qwen2.5-7B,test/number_theory/1065.json,Number Theory,general,B,1.0,0.9,B,0.35,B,0.95,A,0.8,A
Qwen2.5-7B,test/number_theory/466.json,Number Theory,general,A,0.0,0.95,B,0.75,B,0.95,A,0.9,B
Qwen2.5-7B,test/number_theory/634.json,Number Theory,general,A,0.0,0.9,B,0.55,B,0.95,A,0.9,B
Qwen2.5-7B,test/number_theory/533.json,Number Theory,general,A,0.0,0.95,B,0.75,B,1.0,,,B
Qwen2.5-7B,test/number_theory/691.json,Number Theory,general,B,1.0,0.9,B,0.9,B,0.95,B,0.95,A
Qwen2.5-7B,test/number_theory/1287.json,Number Theory,general,B,1.0,0.6,B,1.0,B,1.0,B,1.0,A
Qwen2.5-7B,test/number_theory/631.json,Number Theory,general,B,1.0,0.85,B,0.65,B,0.85,B,0.7,B
Qwen2.5-7B,test/number_theory/488.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/number_theory/1172.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.98,B,1.0,A
Qwen2.5-7B,test/number_theory/203.json,Number Theory,general,B,1.0,0.95,B,0.75,B,0.6,B,0.7,A
Qwen2.5-7B,test/number_theory/911.json,Number Theory,general,A,0.0,0.9,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B,test/number_theory/483.json,Number Theory,general,A,0.0,0.9,B,0.85,B,0.98,B,0.95,A
Qwen2.5-7B,test/number_theory/368.json,Number Theory,general,A,0.0,0.9,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B,test/number_theory/686.json,Number Theory,general,A,0.0,0.95,B,0.75,A,0.95,B,0.95,B
Qwen2.5-7B,test/number_theory/820.json,Number Theory,general,B,1.0,0.95,B,0.98,B,0.95,B,1.0,B
Qwen2.5-7B,test/number_theory/109.json,Number Theory,general,B,1.0,0.85,B,1.0,B,0.95,B,1.0,A
Qwen2.5-7B,test/number_theory/427.json,Number Theory,general,A,0.0,0.9,B,0.55,A,0.95,B,0.95,B
Qwen2.5-7B,test/number_theory/1185.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,A
Qwen2.5-7B,test/number_theory/928.json,Number Theory,general,B,0.0,1.0,A,0.6,A,0.95,A,0.95,B
Qwen2.5-7B,test/number_theory/132.json,Number Theory,general,B,1.0,0.85,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B,test/number_theory/769.json,Number Theory,general,A,0.0,0.95,B,0.85,A,0.98,B,0.95,B
Qwen2.5-7B,test/number_theory/1002.json,Number Theory,general,A,0.0,0.9,B,0.75,B,0.98,B,0.8,A
Qwen2.5-7B,test/number_theory/410.json,Number Theory,general,A,0.0,0.9,B,0.85,B,0.95,B,0.9,B
Qwen2.5-7B,test/number_theory/255.json,Number Theory,general,B,1.0,0.9,B,1.0,B,0.95,B,0.95,B
Qwen2.5-7B,test/number_theory/1000.json,Number Theory,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,B
Qwen2.5-7B,test/number_theory/13.json,Number Theory,general,A,1.0,1.0,A,0.55,A,0.98,A,0.95,B
Qwen2.5-7B,test/number_theory/459.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/number_theory/342.json,Number Theory,general,A,1.0,0.85,B,0.95,A,0.95,A,0.95,B
Qwen2.5-7B,test/number_theory/679.json,Number Theory,general,B,1.0,0.8,B,0.85,A,0.85,B,0.8,A
Qwen2.5-7B,test/number_theory/72.json,Number Theory,general,A,1.0,0.9,A,0.9,A,0.95,A,1.0,B
Qwen2.5-7B,test/number_theory/22.json,Number Theory,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,A
Qwen2.5-7B,test/number_theory/1128.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/number_theory/1090.json,Number Theory,general,A,1.0,0.95,A,0.85,A,1.0,,,A
Qwen2.5-7B,test/number_theory/239.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1622.json,Prealgebra,general,A,1.0,0.9,A,0.75,B,0.95,A,0.9,A
Qwen2.5-7B,test/prealgebra/1139.json,Prealgebra,general,B,0.0,0.95,B,0.6,A,0.75,,,B
Qwen2.5-7B,test/prealgebra/1840.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1302.json,Prealgebra,general,A,1.0,0.85,A,0.75,A,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/930.json,Prealgebra,general,A,0.0,0.95,A,0.85,B,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1558.json,Prealgebra,general,B,1.0,0.9,B,0.85,B,0.95,B,1.0,A
Qwen2.5-7B,test/prealgebra/1388.json,Prealgebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/951.json,Prealgebra,general,B,0.0,1.0,A,0.75,B,0.95,A,0.95,B
Qwen2.5-7B,test/prealgebra/572.json,Prealgebra,general,A,1.0,0.85,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/prealgebra/1247.json,Prealgebra,general,A,0.0,0.95,B,0.9,A,0.95,B,0.95,A
Qwen2.5-7B,test/prealgebra/1747.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1233.json,Prealgebra,general,A,1.0,0.85,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/192.json,Prealgebra,general,A,1.0,1.0,B,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/307.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/1761.json,Prealgebra,general,A,0.0,0.85,B,0.75,A,0.95,B,0.7,A
Qwen2.5-7B,test/prealgebra/1646.json,Prealgebra,general,A,1.0,1.0,A,0.6,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/105.json,Prealgebra,general,A,1.0,0.95,A,0.8,A,0.95,B,1.0,A
Qwen2.5-7B,test/prealgebra/1924.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/1804.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
Qwen2.5-7B,test/prealgebra/1733.json,Prealgebra,general,A,1.0,0.9,A,0.7,A,0.6,A,0.8,B
Qwen2.5-7B,test/prealgebra/505.json,Prealgebra,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,A
Qwen2.5-7B,test/prealgebra/1686.json,Prealgebra,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,A
Qwen2.5-7B,test/prealgebra/1807.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1297.json,Prealgebra,general,B,1.0,0.9,B,0.75,A,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1655.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1356.json,Prealgebra,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Qwen2.5-7B,test/prealgebra/1003.json,Prealgebra,general,A,0.0,0.9,B,0.85,A,0.85,B,0.8,A
Qwen2.5-7B,test/prealgebra/1272.json,Prealgebra,general,A,1.0,0.9,B,0.9,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/1113.json,Prealgebra,general,A,0.0,0.75,B,0.95,B,0.98,B,1.0,A
Qwen2.5-7B,test/prealgebra/1908.json,Prealgebra,general,A,1.0,0.9,A,0.95,A,0.75,A,0.7,B
Qwen2.5-7B,test/prealgebra/1922.json,Prealgebra,general,A,0.0,0.9,B,0.85,A,0.95,B,0.95,A
Qwen2.5-7B,test/prealgebra/1907.json,Prealgebra,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/2086.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
Qwen2.5-7B,test/prealgebra/378.json,Prealgebra,general,B,1.0,0.95,B,1.0,B,0.95,B,1.0,B
Qwen2.5-7B,test/prealgebra/1555.json,Prealgebra,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/prealgebra/1436.json,Prealgebra,general,A,1.0,0.85,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B,test/prealgebra/1961.json,Prealgebra,general,A,1.0,0.9,A,0.6,A,0.9,A,0.7,B
Qwen2.5-7B,test/prealgebra/2057.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Qwen2.5-7B,test/prealgebra/153.json,Prealgebra,general,A,1.0,0.9,A,0.5,A,0.95,B,0.95,A
Qwen2.5-7B,test/prealgebra/874.json,Prealgebra,general,A,0.0,0.9,B,0.6,B,0.6,,,A
Qwen2.5-7B,test/prealgebra/1251.json,Prealgebra,general,B,1.0,0.9,B,1.0,B,0.98,B,1.0,A
Qwen2.5-7B,test/prealgebra/1458.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1995.json,Prealgebra,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,A
Qwen2.5-7B,test/prealgebra/1317.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1742.json,Prealgebra,general,B,1.0,0.9,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/993.json,Prealgebra,general,A,0.0,0.9,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1834.json,Prealgebra,general,A,0.0,0.9,B,0.75,B,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1512.json,Prealgebra,general,A,1.0,0.95,B,0.7,A,0.85,,,B
Qwen2.5-7B,test/prealgebra/260.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.9,A
Qwen2.5-7B,test/prealgebra/1787.json,Prealgebra,general,B,1.0,0.9,B,0.95,B,0.95,B,1.0,A
Qwen2.5-7B,test/prealgebra/1044.json,Prealgebra,general,A,0.0,0.9,B,0.85,A,0.95,B,1.0,B
Qwen2.5-7B,test/prealgebra/465.json,Prealgebra,general,A,0.0,0.75,B,0.65,B,0.75,B,0.7,A
Qwen2.5-7B,test/prealgebra/1423.json,Prealgebra,general,A,1.0,0.9,B,0.6,A,0.95,A,0.95,B
Qwen2.5-7B,test/prealgebra/954.json,Prealgebra,general,A,1.0,0.9,A,0.65,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/1973.json,Prealgebra,general,A,0.0,0.85,B,0.95,B,0.95,B,1.0,A
Qwen2.5-7B,test/prealgebra/1730.json,Prealgebra,general,A,1.0,0.9,A,0.98,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/1238.json,Prealgebra,general,A,1.0,0.9,B,0.85,A,0.95,A,1.0,A
Qwen2.5-7B,test/prealgebra/1353.json,Prealgebra,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/1187.json,Prealgebra,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Qwen2.5-7B,test/prealgebra/1743.json,Prealgebra,general,B,1.0,0.9,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1865.json,Prealgebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1298.json,Prealgebra,general,A,1.0,0.85,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/2066.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/631.json,Prealgebra,general,A,0.0,0.95,B,1.0,B,0.95,B,0.9,A
Qwen2.5-7B,test/prealgebra/977.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/1991.json,Prealgebra,general,A,1.0,0.9,A,0.85,A,0.95,B,0.9,A
Qwen2.5-7B,test/prealgebra/1784.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
Qwen2.5-7B,test/prealgebra/1572.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/65.json,Prealgebra,general,A,0.0,1.0,B,0.6,A,0.95,B,0.95,A
Qwen2.5-7B,test/prealgebra/1227.json,Prealgebra,general,B,1.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B,test/prealgebra/2019.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,B,1.0,A
Qwen2.5-7B,test/prealgebra/1640.json,Prealgebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,A
Qwen2.5-7B,test/prealgebra/2037.json,Prealgebra,general,A,0.0,0.9,B,0.65,B,0.95,B,0.95,B
Qwen2.5-7B,test/prealgebra/996.json,Prealgebra,general,A,1.0,0.95,A,0.8,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/805.json,Prealgebra,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,B
Qwen2.5-7B,test/prealgebra/914.json,Prealgebra,general,A,0.0,0.95,B,0.8,B,0.85,,,A
Qwen2.5-7B,test/prealgebra/1114.json,Prealgebra,general,A,1.0,0.9,A,1.0,A,1.0,A,0.9,A
Qwen2.5-7B,test/prealgebra/846.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/prealgebra/1930.json,Prealgebra,general,B,1.0,1.0,B,1.0,B,0.98,B,1.0,B
Qwen2.5-7B,test/prealgebra/1252.json,Prealgebra,general,A,0.0,0.9,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B,test/prealgebra/1203.json,Prealgebra,general,A,0.0,1.0,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B,test/prealgebra/1128.json,Prealgebra,general,A,0.0,0.8,A,0.65,B,0.9,B,0.7,B
Qwen2.5-7B,test/geometry/248.json,Geometry,general,A,0.0,1.0,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B,test/geometry/434.json,Geometry,general,A,1.0,0.95,A,0.85,A,1.0,,,A
Qwen2.5-7B,test/geometry/967.json,Geometry,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/geometry/627.json,Geometry,general,B,1.0,0.9,B,1.0,B,0.95,B,1.0,B
Qwen2.5-7B,test/geometry/178.json,Geometry,general,A,0.0,0.9,B,0.85,A,0.95,B,0.95,A
Qwen2.5-7B,test/geometry/456.json,Geometry,general,A,1.0,0.85,A,0.9,A,0.95,A,0.95,B
Qwen2.5-7B,test/geometry/353.json,Geometry,general,A,0.0,0.95,B,0.85,B,0.95,B,1.0,B
Qwen2.5-7B,test/geometry/183.json,Geometry,general,B,1.0,0.95,B,0.6,B,0.65,,,B
Qwen2.5-7B,test/geometry/283.json,Geometry,general,A,1.0,0.85,A,0.75,B,0.95,A,0.9,A
Qwen2.5-7B,test/geometry/1140.json,Geometry,general,A,0.0,1.0,B,1.0,B,1.0,B,1.0,A
Qwen2.5-7B,test/geometry/172.json,Geometry,general,A,1.0,0.95,A,0.65,A,0.85,,,A
Qwen2.5-7B,test/geometry/880.json,Geometry,general,A,1.0,0.9,A,0.85,A,1.0,,,B
Qwen2.5-7B,test/geometry/802.json,Geometry,general,A,0.0,0.85,B,0.55,A,0.95,B,0.95,B
Qwen2.5-7B,test/geometry/65.json,Geometry,general,A,0.0,0.9,B,0.7,B,0.6,,,B
Qwen2.5-7B,test/geometry/702.json,Geometry,general,B,1.0,0.9,B,0.75,B,0.9,,,A
Qwen2.5-7B,test/geometry/221.json,Geometry,general,A,0.0,0.95,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B,test/geometry/547.json,Geometry,general,A,0.0,1.0,B,0.65,B,0.85,A,0.7,A
Qwen2.5-7B,test/geometry/229.json,Geometry,general,B,1.0,0.95,A,0.7,B,0.85,,,A
Qwen2.5-7B,test/geometry/254.json,Geometry,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B,test/geometry/473.json,Geometry,general,A,0.0,0.8,B,0.7,B,0.95,B,1.0,B
Qwen2.5-7B,test/geometry/347.json,Geometry,general,A,0.0,0.85,B,0.85,A,0.95,B,0.9,A
Qwen2.5-7B,test/geometry/483.json,Geometry,general,A,0.0,0.9,B,0.85,A,0.98,B,0.95,B
Qwen2.5-7B,test/geometry/826.json,Geometry,general,A,0.0,0.9,B,0.65,B,0.7,,,B
Qwen2.5-7B,test/geometry/226.json,Geometry,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Qwen2.5-7B,test/geometry/686.json,Geometry,general,A,1.0,0.85,B,0.6,A,0.5,A,0.7,B
Qwen2.5-7B,test/geometry/1097.json,Geometry,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/geometry/965.json,Geometry,general,A,1.0,0.95,B,0.65,A,0.7,,,A
Qwen2.5-7B,test/geometry/711.json,Geometry,general,A,1.0,1.0,B,0.6,A,0.6,,,B
Qwen2.5-7B,test/geometry/1108.json,Geometry,general,A,0.0,0.8,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B,test/geometry/947.json,Geometry,general,A,0.0,0.85,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B,test/geometry/465.json,Geometry,general,B,1.0,0.95,B,0.98,B,0.95,B,1.0,A
Qwen2.5-7B,test/geometry/73.json,Geometry,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B,test/geometry/106.json,Geometry,general,A,0.0,1.0,B,0.9,B,0.95,B,1.0,B
Qwen2.5-7B,test/geometry/846.json,Geometry,general,A,1.0,1.0,B,0.65,A,0.75,A,0.6,A
Qwen2.5-7B,test/geometry/538.json,Geometry,general,A,0.0,0.95,B,0.98,B,0.95,B,1.0,B
Qwen2.5-7B,test/geometry/795.json,Geometry,general,A,1.0,0.9,A,0.95,A,1.0,A,1.0,B
Qwen2.5-7B,test/geometry/817.json,Geometry,general,A,0.0,0.95,B,0.75,B,0.9,,,B
Qwen2.5-7B,test/geometry/843.json,Geometry,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B,test/geometry/477.json,Geometry,general,A,1.0,0.95,A,1.0,A,1.0,A,0.95,A
Qwen2.5-7B,test/geometry/561.json,Geometry,general,A,1.0,0.95,A,0.75,A,0.85,A,0.8,B
Qwen2.5-7B,test/geometry/615.json,Geometry,general,A,1.0,0.95,B,0.75,A,0.85,A,0.7,A
Qwen2.5-7B,test/counting_and_probability/525.json,Counting & Probability,general,A,0.0,0.3,A,0.65,B,0.75,,,A
Qwen2.5-7B,test/counting_and_probability/666.json,Counting & Probability,general,A,0.0,1.0,A,0.55,B,0.95,B,1.0,B
Qwen2.5-7B,test/counting_and_probability/134.json,Counting & Probability,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Qwen2.5-7B,test/counting_and_probability/119.json,Counting & Probability,general,A,1.0,0.9,B,0.51,A,0.95,A,0.95,A
Qwen2.5-7B,test/counting_and_probability/1114.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/counting_and_probability/377.json,Counting & Probability,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B,test/counting_and_probability/23957.json,Counting & Probability,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B,test/counting_and_probability/1060.json,Counting & Probability,general,A,0.0,0.95,B,0.6,A,0.95,B,0.7,B
Qwen2.5-7B,test/counting_and_probability/430.json,Counting & Probability,general,B,1.0,0.95,B,0.65,B,0.85,,,A
Qwen2.5-7B,test/counting_and_probability/159.json,Counting & Probability,general,A,0.0,0.8,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B,test/counting_and_probability/230.json,Counting & Probability,general,A,0.0,0.95,B,0.9,B,0.95,B,1.0,B
Qwen2.5-7B,test/counting_and_probability/803.json,Counting & Probability,general,A,0.0,0.85,B,0.7,A,0.95,B,0.95,B
Qwen2.5-7B,test/counting_and_probability/181.json,Counting & Probability,general,A,1.0,0.8,A,0.6,A,0.85,A,0.7,B
Qwen2.5-7B,test/counting_and_probability/51.json,Counting & Probability,general,B,0.0,0.95,A,1.0,A,0.95,A,1.0,B
Qwen2.5-7B,test/counting_and_probability/508.json,Counting & Probability,general,A,1.0,0.9,A,0.85,A,0.95,A,1.0,B
Qwen2.5-7B,test/counting_and_probability/389.json,Counting & Probability,general,A,1.0,1.0,A,0.9,A,0.95,A,1.0,A
Qwen2.5-7B,test/counting_and_probability/765.json,Counting & Probability,general,A,1.0,0.9,A,0.85,A,1.0,,,B
Qwen2.5-7B,test/counting_and_probability/282.json,Counting & Probability,general,A,0.0,0.9,B,0.8,B,0.85,B,0.9,B
Qwen2.5-7B,test/counting_and_probability/71.json,Counting & Probability,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/counting_and_probability/894.json,Counting & Probability,general,A,0.0,0.95,B,1.0,B,1.0,B,1.0,A
Qwen2.5-7B,test/counting_and_probability/1009.json,Counting & Probability,general,A,0.0,1.0,B,0.9,A,0.95,B,0.95,A
Qwen2.5-7B,test/counting_and_probability/913.json,Counting & Probability,general,A,1.0,0.95,B,0.6,A,0.95,A,0.95,B
Qwen2.5-7B,test/counting_and_probability/25149.json,Counting & Probability,general,A,1.0,0.85,A,0.95,A,1.0,A,1.0,B
Qwen2.5-7B,test/counting_and_probability/339.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/counting_and_probability/870.json,Counting & Probability,general,A,0.0,0.9,B,0.85,B,0.7,B,0.9,B
Qwen2.5-7B,test/counting_and_probability/216.json,Counting & Probability,general,A,0.0,1.0,B,0.85,B,0.98,B,1.0,A
Qwen2.5-7B,test/counting_and_probability/737.json,Counting & Probability,general,A,0.0,0.95,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B,test/counting_and_probability/116.json,Counting & Probability,general,A,0.0,0.95,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B,test/counting_and_probability/238.json,Counting & Probability,general,A,1.0,0.8,A,0.75,A,0.9,,,A
Qwen2.5-7B,test/counting_and_probability/1014.json,Counting & Probability,general,B,1.0,0.95,A,0.85,B,0.95,B,0.95,B
Qwen2.5-7B,test/counting_and_probability/14.json,Counting & Probability,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B,test/counting_and_probability/188.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.75,,,B
Qwen2.5-7B,test/counting_and_probability/761.json,Counting & Probability,general,A,0.0,0.85,B,0.75,B,0.95,B,1.0,B
Qwen2.5-7B,test/counting_and_probability/10.json,Counting & Probability,general,A,1.0,0.9,A,1.0,A,0.95,A,1.0,B
Qwen2.5-7B,test/counting_and_probability/731.json,Counting & Probability,general,A,0.0,0.9,B,0.7,B,0.6,,,A
Qwen2.5-7B,test/counting_and_probability/190.json,Counting & Probability,general,A,1.0,0.9,B,0.95,A,0.95,A,0.95,A
Qwen2.5-7B,test/counting_and_probability/1003.json,Counting & Probability,general,B,1.0,0.85,B,0.65,B,0.7,B,0.7,B
Qwen2.5-7B,test/counting_and_probability/199.json,Counting & Probability,general,A,1.0,0.95,B,0.65,A,0.95,,,B
Qwen2.5-7B-Instruct,test/precalculus/807.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/precalculus/927.json,Precalculus,general,A,1.0,0.75,A,0.8,A,0.85,,,A
Qwen2.5-7B-Instruct,test/precalculus/1303.json,Precalculus,general,B,1.0,0.95,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/precalculus/990.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.98,A,1.0,A
Qwen2.5-7B-Instruct,test/precalculus/1199.json,Precalculus,general,A,1.0,0.95,A,0.85,A,0.9,A,0.75,A
Qwen2.5-7B-Instruct,test/precalculus/779.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/precalculus/285.json,Precalculus,general,B,0.0,0.7,A,1.0,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/precalculus/1105.json,Precalculus,general,A,1.0,0.85,A,0.85,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/675.json,Precalculus,general,A,1.0,0.95,A,0.98,A,0.98,A,1.0,A
Qwen2.5-7B-Instruct,test/precalculus/1146.json,Precalculus,general,B,0.0,0.95,A,0.9,A,0.6,A,0.7,A
Qwen2.5-7B-Instruct,test/precalculus/1313.json,Precalculus,general,B,0.0,0.85,A,1.0,A,0.98,A,1.0,A
Qwen2.5-7B-Instruct,test/precalculus/24313.json,Precalculus,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/precalculus/34.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/1300.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.98,A,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/44.json,Precalculus,general,A,0.0,0.85,A,0.6,B,0.7,,,B
Qwen2.5-7B-Instruct,test/precalculus/477.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/precalculus/43.json,Precalculus,general,B,1.0,0.95,B,0.85,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/precalculus/986.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/117.json,Precalculus,general,A,0.0,0.95,B,0.65,B,0.7,,,A
Qwen2.5-7B-Instruct,test/precalculus/697.json,Precalculus,general,A,0.0,0.95,B,0.65,B,0.85,B,0.8,B
Qwen2.5-7B-Instruct,test/precalculus/659.json,Precalculus,general,A,1.0,0.95,A,0.75,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/precalculus/263.json,Precalculus,general,A,1.0,0.95,A,0.7,A,1.0,A,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/541.json,Precalculus,general,A,0.0,0.95,B,0.8,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/precalculus/190.json,Precalculus,general,B,1.0,0.85,B,0.85,B,0.85,B,0.8,A
Qwen2.5-7B-Instruct,test/precalculus/819.json,Precalculus,general,A,1.0,0.95,A,0.75,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/precalculus/1056.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.95,,,A
Qwen2.5-7B-Instruct,test/precalculus/441.json,Precalculus,general,A,0.0,0.95,B,0.75,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/989.json,Precalculus,general,A,1.0,0.85,A,0.6,B,0.7,A,0.6,B
Qwen2.5-7B-Instruct,test/precalculus/920.json,Precalculus,general,A,0.0,0.85,B,0.85,B,0.85,B,0.8,B
Qwen2.5-7B-Instruct,test/precalculus/452.json,Precalculus,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/580.json,Precalculus,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/768.json,Precalculus,general,A,1.0,0.85,B,0.85,A,0.85,,,A
Qwen2.5-7B-Instruct,test/precalculus/1172.json,Precalculus,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,B
Qwen2.5-7B-Instruct,test/precalculus/1201.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.85,,,A
Qwen2.5-7B-Instruct,test/precalculus/881.json,Precalculus,general,A,0.0,0.95,B,0.92,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/695.json,Precalculus,general,A,0.0,0.95,B,0.9,B,0.98,B,1.0,A
Qwen2.5-7B-Instruct,test/precalculus/742.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/precalculus/801.json,Precalculus,general,A,0.0,0.95,B,0.75,B,0.7,,,B
Qwen2.5-7B-Instruct,test/precalculus/826.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,B,0.9,B
Qwen2.5-7B-Instruct,test/precalculus/1281.json,Precalculus,general,A,1.0,0.6,A,0.75,A,0.7,A,0.95,B
Qwen2.5-7B-Instruct,test/precalculus/96.json,Precalculus,general,A,1.0,0.7,A,1.0,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/precalculus/1289.json,Precalculus,general,B,1.0,0.85,A,0.85,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/precalculus/902.json,Precalculus,general,B,1.0,0.5,B,0.7,A,0.6,,,A
Qwen2.5-7B-Instruct,test/precalculus/1291.json,Precalculus,general,A,1.0,0.95,A,0.6,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/precalculus/398.json,Precalculus,general,A,0.0,0.95,B,0.75,B,0.9,A,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/681.json,Precalculus,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/145.json,Precalculus,general,B,1.0,0.85,B,1.0,B,0.98,B,1.0,A
Qwen2.5-7B-Instruct,test/precalculus/625.json,Precalculus,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/1202.json,Precalculus,general,A,0.0,0.95,B,0.65,B,0.95,,,B
Qwen2.5-7B-Instruct,test/precalculus/1133.json,Precalculus,general,A,1.0,0.95,A,0.65,A,0.5,A,0.6,B
Qwen2.5-7B-Instruct,test/precalculus/499.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/precalculus/323.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.98,A,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/703.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.9,,,A
Qwen2.5-7B-Instruct,test/precalculus/1252.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/precalculus/1082.json,Precalculus,general,A,0.0,0.95,B,1.0,B,0.98,B,1.0,A
Qwen2.5-7B-Instruct,test/precalculus/356.json,Precalculus,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1994.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1197.json,Intermediate Algebra,general,B,1.0,0.85,B,0.95,B,0.9,,,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/134.json,Intermediate Algebra,general,B,0.0,0.85,A,0.85,B,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1000.json,Intermediate Algebra,general,A,0.0,0.95,B,1.0,B,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/607.json,Intermediate Algebra,general,B,0.0,0.85,B,0.8,A,0.9,A,0.9,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1388.json,Intermediate Algebra,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/428.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1454.json,Intermediate Algebra,general,B,0.0,0.85,A,1.0,A,0.95,A,0.9,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1217.json,Intermediate Algebra,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1168.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/956.json,Intermediate Algebra,general,A,0.0,0.85,B,0.65,B,0.85,,,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1247.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/279.json,Intermediate Algebra,general,A,1.0,0.95,A,0.6,A,0.95,A,0.75,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/207.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/623.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/47.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1849.json,Intermediate Algebra,general,A,1.0,0.95,A,0.8,A,0.9,A,0.7,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/2046.json,Intermediate Algebra,general,A,0.0,0.85,B,1.0,B,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/662.json,Intermediate Algebra,general,A,0.0,0.85,B,0.85,B,0.85,B,0.6,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/582.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.9,,,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/431.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/558.json,Intermediate Algebra,general,B,1.0,0.85,B,0.65,B,0.9,,,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/362.json,Intermediate Algebra,general,B,1.0,0.75,B,0.85,B,0.85,,,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/515.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/894.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.8,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/345.json,Intermediate Algebra,general,A,1.0,0.75,A,1.0,A,0.6,,,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1898.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/232.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/128.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.98,A,1.0,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1063.json,Intermediate Algebra,general,A,0.0,0.95,B,0.7,A,0.65,,,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1126.json,Intermediate Algebra,general,A,1.0,0.85,B,0.9,A,0.95,A,0.9,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/2022.json,Intermediate Algebra,general,A,1.0,0.95,A,0.7,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1151.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.9,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1408.json,Intermediate Algebra,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/966.json,Intermediate Algebra,general,A,1.0,1.0,A,0.98,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/964.json,Intermediate Algebra,general,A,0.0,0.95,B,0.75,B,0.95,B,0.6,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1410.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/991.json,Intermediate Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/183.json,Intermediate Algebra,general,A,1.0,0.75,A,0.85,A,0.65,A,0.8,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1422.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/2196.json,Intermediate Algebra,general,A,1.0,0.85,A,0.95,A,0.85,A,0.95,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/591.json,Intermediate Algebra,general,A,1.0,0.9,A,0.8,A,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1555.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.98,B,1.0,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1510.json,Intermediate Algebra,general,A,1.0,0.6,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/102.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/986.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.98,B,1.0,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1354.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.99,,,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1837.json,Intermediate Algebra,general,B,1.0,0.95,B,0.9,B,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/337.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1210.json,Intermediate Algebra,general,B,1.0,0.85,A,0.85,B,0.9,,,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1123.json,Intermediate Algebra,general,A,1.0,0.9,A,0.8,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/149.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.9,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1411.json,Intermediate Algebra,general,A,1.0,0.85,A,0.7,A,0.9,A,0.6,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/960.json,Intermediate Algebra,general,A,0.0,0.95,B,0.7,B,0.65,,,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1300.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/90.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.99,,,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/754.json,Intermediate Algebra,general,B,0.0,0.95,A,0.85,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/446.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1544.json,Intermediate Algebra,general,A,1.0,0.95,B,0.75,A,0.7,A,0.7,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1714.json,Intermediate Algebra,general,A,1.0,0.9,A,0.85,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/2152.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/117.json,Intermediate Algebra,general,A,1.0,0.85,A,0.9,A,0.8,A,0.8,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/190.json,Intermediate Algebra,general,B,1.0,0.9,B,0.7,B,0.6,A,0.7,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/776.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1566.json,Intermediate Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1572.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1166.json,Intermediate Algebra,general,A,0.0,0.85,B,0.7,B,0.6,A,0.7,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/860.json,Intermediate Algebra,general,A,0.0,0.95,B,0.7,B,0.85,B,0.6,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1407.json,Intermediate Algebra,general,B,1.0,0.85,B,0.95,B,0.9,,,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1405.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/690.json,Intermediate Algebra,general,A,0.0,0.95,B,0.55,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/955.json,Intermediate Algebra,general,B,1.0,0.95,B,0.8,B,0.75,A,0.7,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1992.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,A,0.98,B,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1111.json,Intermediate Algebra,general,A,1.0,0.95,B,0.6,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1791.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1806.json,Intermediate Algebra,general,A,1.0,0.85,A,0.85,A,0.75,A,0.8,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1797.json,Intermediate Algebra,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/2146.json,Intermediate Algebra,general,A,0.0,0.85,B,1.0,B,0.98,B,1.0,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/2015.json,Intermediate Algebra,general,B,0.0,0.95,A,0.85,A,0.6,A,0.4,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/121.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1014.json,Intermediate Algebra,general,A,0.0,0.85,B,0.95,B,0.85,,,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1462.json,Intermediate Algebra,general,A,0.0,0.85,B,0.7,B,0.85,B,0.4,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/199.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.9,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1779.json,Intermediate Algebra,general,B,1.0,0.95,A,0.75,B,0.85,,,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1102.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/834.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/158.json,Intermediate Algebra,general,B,1.0,0.85,B,1.0,B,0.98,B,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/752.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1279.json,Intermediate Algebra,general,A,0.0,0.95,B,0.65,A,0.6,B,0.8,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1467.json,Intermediate Algebra,general,A,0.0,0.95,B,0.75,B,0.6,,,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/101.json,Intermediate Algebra,general,A,1.0,0.95,A,0.98,A,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1365.json,Intermediate Algebra,general,A,0.0,0.95,A,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1350.json,Intermediate Algebra,general,A,0.0,0.85,B,0.65,B,0.95,,,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1930.json,Intermediate Algebra,general,A,1.0,0.85,A,0.95,A,0.85,A,0.9,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1981.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.98,A,1.0,B
Qwen2.5-7B-Instruct,test/intermediate_algebra/1232.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/intermediate_algebra/1508.json,Intermediate Algebra,general,A,1.0,0.85,A,0.95,A,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/algebra/2584.json,Algebra,general,A,1.0,0.9,A,0.85,A,0.98,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/1349.json,Algebra,general,A,0.0,0.9,B,1.0,B,0.98,B,1.0,B
Qwen2.5-7B-Instruct,test/algebra/2036.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/1098.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/1837.json,Algebra,general,A,1.0,1.0,A,0.9,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/algebra/2193.json,Algebra,general,A,0.0,0.9,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/2427.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.99,A,1.0,A
Qwen2.5-7B-Instruct,test/algebra/1072.json,Algebra,general,B,1.0,0.85,B,1.0,B,0.99,B,1.0,B
Qwen2.5-7B-Instruct,test/algebra/24.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/2214.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.9,B
Qwen2.5-7B-Instruct,test/algebra/305.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/1265.json,Algebra,general,A,0.0,0.95,B,0.9,A,0.95,B,0.9,A
Qwen2.5-7B-Instruct,test/algebra/187.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/algebra/769.json,Algebra,general,B,0.0,0.85,B,0.85,A,0.98,A,0.95,B
Qwen2.5-7B-Instruct,test/algebra/722.json,Algebra,general,B,1.0,0.95,A,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2046.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/2253.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/1004.json,Algebra,general,A,1.0,0.9,B,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/1035.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.98,A,1.0,A
Qwen2.5-7B-Instruct,test/algebra/2700.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/algebra/893.json,Algebra,general,A,0.0,0.95,B,0.75,A,0.98,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/567.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.98,A,0.95,B
Qwen2.5-7B-Instruct,test/algebra/892.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2023.json,Algebra,general,A,0.0,0.75,B,1.0,A,0.9,B,1.0,B
Qwen2.5-7B-Instruct,test/algebra/873.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2058.json,Algebra,general,A,0.0,0.9,B,1.0,B,0.98,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/2593.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/2157.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.9,A
Qwen2.5-7B-Instruct,test/algebra/2251.json,Algebra,general,A,0.0,0.85,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/1332.json,Algebra,general,A,1.0,0.85,B,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/972.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.9,A
Qwen2.5-7B-Instruct,test/algebra/2232.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/algebra/661.json,Algebra,general,B,1.0,0.85,A,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/246.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/1519.json,Algebra,general,B,1.0,0.85,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/988.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2570.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.9,A
Qwen2.5-7B-Instruct,test/algebra/621.json,Algebra,general,A,0.0,0.65,B,1.0,B,0.99,B,1.0,A
Qwen2.5-7B-Instruct,test/algebra/1255.json,Algebra,general,B,1.0,0.9,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2517.json,Algebra,general,A,0.0,0.75,B,1.0,B,0.98,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/478.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/297.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.9,A,0.8,A
Qwen2.5-7B-Instruct,test/algebra/841.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/algebra/686.json,Algebra,general,A,1.0,0.85,A,1.0,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/algebra/351.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/1275.json,Algebra,general,A,0.0,0.85,B,0.95,B,0.98,B,1.0,A
Qwen2.5-7B-Instruct,test/algebra/1082.json,Algebra,general,A,1.0,0.95,A,0.7,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/1214.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2199.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/733.json,Algebra,general,B,0.0,0.85,A,0.85,A,0.95,,,B
Qwen2.5-7B-Instruct,test/algebra/109.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/1937.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/algebra/291.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2102.json,Algebra,general,A,1.0,0.95,A,0.75,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/907.json,Algebra,general,B,1.0,0.95,B,0.85,B,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/algebra/864.json,Algebra,general,A,0.0,0.9,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/2159.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/algebra/1578.json,Algebra,general,A,1.0,0.95,A,0.55,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/975.json,Algebra,general,A,0.0,0.75,B,0.85,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/1143.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.9,A
Qwen2.5-7B-Instruct,test/algebra/2626.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.99,A,1.0,B
Qwen2.5-7B-Instruct,test/algebra/1787.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/1934.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.98,B,1.0,B
Qwen2.5-7B-Instruct,test/algebra/2064.json,Algebra,general,A,1.0,0.95,A,0.85,B,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/algebra/694.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/524.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/algebra/2551.json,Algebra,general,A,0.0,0.95,B,0.75,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/346.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/1282.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.9,,,B
Qwen2.5-7B-Instruct,test/algebra/1184.json,Algebra,general,A,0.0,0.85,B,0.85,B,0.75,,,B
Qwen2.5-7B-Instruct,test/algebra/634.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/algebra/2486.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2257.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/1842.json,Algebra,general,A,0.0,0.85,B,0.85,A,0.95,B,0.85,A
Qwen2.5-7B-Instruct,test/algebra/791.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/algebra/276.json,Algebra,general,B,1.0,0.95,B,0.95,A,0.98,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2735.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/algebra/425.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/1936.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/2176.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.9,A
Qwen2.5-7B-Instruct,test/algebra/509.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/1457.json,Algebra,general,A,1.0,0.85,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/2592.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.98,A,0.95,B
Qwen2.5-7B-Instruct,test/algebra/858.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/1529.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/algebra/1338.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/algebra/1547.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/529.json,Algebra,general,A,0.0,0.9,B,0.95,A,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/algebra/1078.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/251.json,Algebra,general,B,1.0,0.95,A,0.98,B,0.95,B,0.9,B
Qwen2.5-7B-Instruct,test/algebra/1199.json,Algebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/2264.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/1303.json,Algebra,general,A,1.0,0.9,A,0.9,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/101.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/170.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/849.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/algebra/1031.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/853.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2277.json,Algebra,general,A,0.0,0.85,B,1.0,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/algebra/518.json,Algebra,general,A,0.0,0.9,B,0.75,A,0.95,B,0.9,B
Qwen2.5-7B-Instruct,test/algebra/114.json,Algebra,general,A,1.0,0.85,A,0.95,B,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/algebra/1960.json,Algebra,general,B,1.0,0.85,B,0.75,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2680.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/2391.json,Algebra,general,A,1.0,0.95,A,0.8,A,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/algebra/776.json,Algebra,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,A
Qwen2.5-7B-Instruct,test/algebra/1796.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/algebra/1339.json,Algebra,general,A,1.0,0.95,B,0.65,A,0.98,A,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2743.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2043.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/1553.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/algebra/2080.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/1343.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/668.json,Algebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2430.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/2789.json,Algebra,general,A,1.0,0.85,A,0.95,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/algebra/1814.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/2476.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2780.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/824.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/algebra/1425.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/algebra/224.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/435.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2470.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/algebra/2779.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/572.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/515.json,Number Theory,general,A,1.0,0.95,B,0.75,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/1032.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/737.json,Number Theory,general,A,1.0,0.9,A,0.9,A,0.98,A,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/864.json,Number Theory,general,A,1.0,0.95,A,0.9,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/627.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/45.json,Number Theory,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,B
Qwen2.5-7B-Instruct,test/number_theory/1055.json,Number Theory,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,A
Qwen2.5-7B-Instruct,test/number_theory/46.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/516.json,Number Theory,general,A,0.0,0.95,B,0.9,B,0.9,,,A
Qwen2.5-7B-Instruct,test/number_theory/357.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.9,B,0.8,A
Qwen2.5-7B-Instruct,test/number_theory/914.json,Number Theory,general,A,0.0,0.85,B,0.9,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/number_theory/847.json,Number Theory,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/753.json,Number Theory,general,A,1.0,0.85,A,1.0,A,0.98,A,1.0,A
Qwen2.5-7B-Instruct,test/number_theory/1257.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/156.json,Number Theory,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/612.json,Number Theory,general,A,0.0,0.9,B,0.9,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/931.json,Number Theory,general,A,1.0,0.95,B,0.55,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/521.json,Number Theory,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/598.json,Number Theory,general,A,1.0,0.95,A,0.75,B,0.9,A,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/978.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/838.json,Number Theory,general,A,0.0,0.95,B,0.7,B,1.0,A,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/149.json,Number Theory,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/1201.json,Number Theory,general,A,0.0,0.95,A,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/234.json,Number Theory,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/417.json,Number Theory,general,A,1.0,0.85,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/89.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,B,0.9,B
Qwen2.5-7B-Instruct,test/number_theory/183.json,Number Theory,general,A,0.0,0.95,B,0.8,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/1065.json,Number Theory,general,A,1.0,0.85,A,0.85,A,0.85,A,0.7,A
Qwen2.5-7B-Instruct,test/number_theory/466.json,Number Theory,general,A,1.0,0.95,B,0.75,A,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/number_theory/634.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/533.json,Number Theory,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/691.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/1287.json,Number Theory,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/631.json,Number Theory,general,B,1.0,0.85,B,1.0,B,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/number_theory/488.json,Number Theory,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/1172.json,Number Theory,general,A,0.0,0.95,B,0.9,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/203.json,Number Theory,general,A,1.0,0.95,A,1.0,A,0.9,A,0.8,A
Qwen2.5-7B-Instruct,test/number_theory/911.json,Number Theory,general,A,1.0,0.95,B,0.6,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/483.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/368.json,Number Theory,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/686.json,Number Theory,general,B,1.0,0.95,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/820.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.9,B,0.9,B
Qwen2.5-7B-Instruct,test/number_theory/109.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/427.json,Number Theory,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/1185.json,Number Theory,general,A,0.0,0.85,B,0.95,B,0.98,B,1.0,B
Qwen2.5-7B-Instruct,test/number_theory/928.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/132.json,Number Theory,general,A,0.0,0.9,B,0.85,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/769.json,Number Theory,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/1002.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/410.json,Number Theory,general,A,1.0,0.9,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/255.json,Number Theory,general,A,0.0,0.9,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/1000.json,Number Theory,general,A,1.0,0.85,A,0.9,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/number_theory/13.json,Number Theory,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/459.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/342.json,Number Theory,general,A,1.0,0.95,A,0.9,A,0.95,B,0.9,A
Qwen2.5-7B-Instruct,test/number_theory/679.json,Number Theory,general,A,0.0,0.85,B,0.85,B,0.9,A,0.7,B
Qwen2.5-7B-Instruct,test/number_theory/72.json,Number Theory,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/number_theory/22.json,Number Theory,general,B,0.0,0.75,A,0.85,A,1.0,A,1.0,A
Qwen2.5-7B-Instruct,test/number_theory/1128.json,Number Theory,general,A,1.0,0.95,B,0.85,A,0.95,A,0.9,A
Qwen2.5-7B-Instruct,test/number_theory/1090.json,Number Theory,general,A,1.0,0.95,A,0.95,A,1.0,A,0.9,A
Qwen2.5-7B-Instruct,test/number_theory/239.json,Number Theory,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1622.json,Prealgebra,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1139.json,Prealgebra,general,B,1.0,0.95,B,0.8,B,0.85,B,0.8,A
Qwen2.5-7B-Instruct,test/prealgebra/1840.json,Prealgebra,general,B,1.0,0.95,A,0.75,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1302.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/930.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/prealgebra/1558.json,Prealgebra,general,B,1.0,0.95,B,0.9,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1388.json,Prealgebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/951.json,Prealgebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/572.json,Prealgebra,general,A,0.0,0.85,B,0.85,B,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/prealgebra/1247.json,Prealgebra,general,A,0.0,0.95,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B-Instruct,test/prealgebra/1747.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1233.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/prealgebra/192.json,Prealgebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/307.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/prealgebra/1761.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1646.json,Prealgebra,general,A,0.0,0.2,B,0.65,B,0.7,,,B
Qwen2.5-7B-Instruct,test/prealgebra/105.json,Prealgebra,general,B,1.0,0.85,B,0.99,B,0.98,B,1.0,A
Qwen2.5-7B-Instruct,test/prealgebra/1924.json,Prealgebra,general,A,1.0,0.85,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1804.json,Prealgebra,general,A,1.0,0.95,B,0.9,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/prealgebra/1733.json,Prealgebra,general,B,1.0,0.85,B,0.75,B,0.95,,,A
Qwen2.5-7B-Instruct,test/prealgebra/505.json,Prealgebra,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1686.json,Prealgebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1807.json,Prealgebra,general,A,1.0,0.95,B,0.8,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1297.json,Prealgebra,general,A,1.0,0.95,B,0.8,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/prealgebra/1655.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1356.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1003.json,Prealgebra,general,A,0.0,0.85,B,0.7,B,0.95,,,B
Qwen2.5-7B-Instruct,test/prealgebra/1272.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1113.json,Prealgebra,general,B,1.0,0.85,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1908.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.7,B
Qwen2.5-7B-Instruct,test/prealgebra/1922.json,Prealgebra,general,B,1.0,0.9,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1907.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/2086.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.9,A
Qwen2.5-7B-Instruct,test/prealgebra/378.json,Prealgebra,general,A,0.0,0.95,B,0.9,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/prealgebra/1555.json,Prealgebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1436.json,Prealgebra,general,B,1.0,0.95,B,0.9,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1961.json,Prealgebra,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,B
Qwen2.5-7B-Instruct,test/prealgebra/2057.json,Prealgebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/153.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/874.json,Prealgebra,general,A,1.0,0.65,A,0.7,B,0.7,,,A
Qwen2.5-7B-Instruct,test/prealgebra/1251.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1458.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1995.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1317.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1742.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/993.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.98,A,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1834.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1512.json,Prealgebra,general,A,0.0,0.95,B,1.0,B,0.98,B,1.0,B
Qwen2.5-7B-Instruct,test/prealgebra/260.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.85,A,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1787.json,Prealgebra,general,A,0.0,0.9,A,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1044.json,Prealgebra,general,A,1.0,0.95,A,0.8,A,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/prealgebra/465.json,Prealgebra,general,A,0.0,0.85,B,0.95,B,0.98,B,0.9,A
Qwen2.5-7B-Instruct,test/prealgebra/1423.json,Prealgebra,general,A,0.0,0.95,B,0.75,B,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/prealgebra/954.json,Prealgebra,general,A,0.0,0.85,B,0.95,A,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/prealgebra/1973.json,Prealgebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1730.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1238.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1353.json,Prealgebra,general,B,1.0,0.85,B,0.95,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1187.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,,,B
Qwen2.5-7B-Instruct,test/prealgebra/1743.json,Prealgebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1865.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/prealgebra/1298.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/2066.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/631.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.85,B
Qwen2.5-7B-Instruct,test/prealgebra/977.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1991.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.9,A
Qwen2.5-7B-Instruct,test/prealgebra/1784.json,Prealgebra,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1572.json,Prealgebra,general,B,0.0,0.95,B,0.85,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/65.json,Prealgebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1227.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/2019.json,Prealgebra,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1640.json,Prealgebra,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/prealgebra/2037.json,Prealgebra,general,B,0.0,0.95,A,0.85,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/prealgebra/996.json,Prealgebra,general,A,1.0,0.95,B,0.8,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/805.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.9,A
Qwen2.5-7B-Instruct,test/prealgebra/914.json,Prealgebra,general,A,0.0,0.95,B,0.75,B,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1114.json,Prealgebra,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/prealgebra/846.json,Prealgebra,general,A,1.0,0.85,A,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/prealgebra/1930.json,Prealgebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1252.json,Prealgebra,general,A,0.0,0.85,B,1.0,B,1.0,B,1.0,B
Qwen2.5-7B-Instruct,test/prealgebra/1203.json,Prealgebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/prealgebra/1128.json,Prealgebra,general,A,1.0,0.95,A,0.85,B,0.9,A,0.8,B
Qwen2.5-7B-Instruct,test/geometry/248.json,Geometry,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/geometry/434.json,Geometry,general,A,0.0,0.95,B,0.85,B,0.9,,,B
Qwen2.5-7B-Instruct,test/geometry/967.json,Geometry,general,A,1.0,0.9,A,0.85,A,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/geometry/627.json,Geometry,general,A,1.0,0.85,A,0.98,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/geometry/178.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Qwen2.5-7B-Instruct,test/geometry/456.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/geometry/353.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/geometry/183.json,Geometry,general,A,0.0,0.85,B,0.6,A,0.5,,,A
Qwen2.5-7B-Instruct,test/geometry/283.json,Geometry,general,A,0.0,0.85,B,1.0,B,0.98,B,1.0,A
Qwen2.5-7B-Instruct,test/geometry/1140.json,Geometry,general,A,0.0,0.95,B,0.9,B,0.95,B,0.9,A
Qwen2.5-7B-Instruct,test/geometry/172.json,Geometry,general,A,1.0,0.75,A,0.85,A,0.85,A,0.9,B
Qwen2.5-7B-Instruct,test/geometry/880.json,Geometry,general,A,0.0,0.95,B,0.75,B,0.85,,,A
Qwen2.5-7B-Instruct,test/geometry/802.json,Geometry,general,A,0.0,0.95,B,0.95,A,0.95,B,0.9,A
Qwen2.5-7B-Instruct,test/geometry/65.json,Geometry,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/geometry/702.json,Geometry,general,A,1.0,0.95,A,0.75,B,0.75,,,B
Qwen2.5-7B-Instruct,test/geometry/221.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/geometry/547.json,Geometry,general,A,0.0,0.95,B,0.75,B,0.75,,,A
Qwen2.5-7B-Instruct,test/geometry/229.json,Geometry,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,B
Qwen2.5-7B-Instruct,test/geometry/254.json,Geometry,general,A,1.0,0.9,A,0.9,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/geometry/473.json,Geometry,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/geometry/347.json,Geometry,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/geometry/483.json,Geometry,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/geometry/826.json,Geometry,general,A,1.0,0.85,A,0.65,B,0.6,,,B
Qwen2.5-7B-Instruct,test/geometry/226.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.95,B,0.9,B
Qwen2.5-7B-Instruct,test/geometry/686.json,Geometry,general,A,1.0,0.95,A,0.85,A,0.9,A,0.8,B
Qwen2.5-7B-Instruct,test/geometry/1097.json,Geometry,general,A,1.0,0.95,A,1.0,A,0.98,A,1.0,A
Qwen2.5-7B-Instruct,test/geometry/965.json,Geometry,general,A,1.0,0.95,B,0.75,A,0.7,A,0.7,A
Qwen2.5-7B-Instruct,test/geometry/711.json,Geometry,general,A,0.0,0.85,A,0.6,B,0.85,,,A
Qwen2.5-7B-Instruct,test/geometry/1108.json,Geometry,general,A,0.0,0.95,B,0.8,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/geometry/947.json,Geometry,general,A,0.0,0.85,B,0.85,B,0.85,,,B
Qwen2.5-7B-Instruct,test/geometry/465.json,Geometry,general,B,1.0,0.85,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/geometry/73.json,Geometry,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/geometry/106.json,Geometry,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/geometry/846.json,Geometry,general,A,1.0,0.95,A,0.75,A,0.85,,,A
Qwen2.5-7B-Instruct,test/geometry/538.json,Geometry,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,B
Qwen2.5-7B-Instruct,test/geometry/795.json,Geometry,general,B,1.0,0.95,B,1.0,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/geometry/817.json,Geometry,general,B,1.0,0.95,B,0.75,B,0.9,,,B
Qwen2.5-7B-Instruct,test/geometry/843.json,Geometry,general,A,1.0,0.95,A,0.9,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/geometry/477.json,Geometry,general,A,0.0,0.95,A,0.85,B,0.95,B,0.9,B
Qwen2.5-7B-Instruct,test/geometry/561.json,Geometry,general,A,1.0,1.0,A,0.85,A,0.75,,,B
Qwen2.5-7B-Instruct,test/geometry/615.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/counting_and_probability/525.json,Counting & Probability,general,A,0.0,0.85,B,0.85,B,0.9,,,A
Qwen2.5-7B-Instruct,test/counting_and_probability/666.json,Counting & Probability,general,B,1.0,0.95,B,1.0,B,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/counting_and_probability/134.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/counting_and_probability/119.json,Counting & Probability,general,A,0.0,0.95,B,0.85,A,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/counting_and_probability/1114.json,Counting & Probability,general,A,1.0,0.85,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/counting_and_probability/377.json,Counting & Probability,general,A,0.0,0.85,B,0.95,B,0.95,B,1.0,B
Qwen2.5-7B-Instruct,test/counting_and_probability/23957.json,Counting & Probability,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/counting_and_probability/1060.json,Counting & Probability,general,A,1.0,0.85,A,1.0,A,1.0,A,1.0,A
Qwen2.5-7B-Instruct,test/counting_and_probability/430.json,Counting & Probability,general,A,1.0,0.85,A,0.85,A,0.85,,,B
Qwen2.5-7B-Instruct,test/counting_and_probability/159.json,Counting & Probability,general,A,1.0,0.95,A,0.75,A,0.95,A,1.0,A
Qwen2.5-7B-Instruct,test/counting_and_probability/230.json,Counting & Probability,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/counting_and_probability/803.json,Counting & Probability,general,A,1.0,0.95,B,0.8,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/counting_and_probability/181.json,Counting & Probability,general,B,1.0,0.95,B,0.75,B,0.75,,,A
Qwen2.5-7B-Instruct,test/counting_and_probability/51.json,Counting & Probability,general,A,1.0,0.9,A,1.0,A,0.98,A,1.0,A
Qwen2.5-7B-Instruct,test/counting_and_probability/508.json,Counting & Probability,general,A,0.0,0.95,B,0.95,A,0.95,B,1.0,A
Qwen2.5-7B-Instruct,test/counting_and_probability/389.json,Counting & Probability,general,A,0.0,0.95,B,0.85,B,0.98,B,0.95,A
Qwen2.5-7B-Instruct,test/counting_and_probability/765.json,Counting & Probability,general,B,1.0,0.95,B,0.95,B,0.98,B,0.9,B
Qwen2.5-7B-Instruct,test/counting_and_probability/282.json,Counting & Probability,general,A,0.0,0.95,B,0.6,B,0.5,,,A
Qwen2.5-7B-Instruct,test/counting_and_probability/71.json,Counting & Probability,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/counting_and_probability/894.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,B,0.9,B
Qwen2.5-7B-Instruct,test/counting_and_probability/1009.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/counting_and_probability/913.json,Counting & Probability,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/counting_and_probability/25149.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/counting_and_probability/339.json,Counting & Probability,general,A,1.0,0.95,A,0.8,A,0.95,A,0.95,B
Qwen2.5-7B-Instruct,test/counting_and_probability/870.json,Counting & Probability,general,A,0.0,0.95,B,1.0,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/counting_and_probability/216.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.98,A,1.0,B
Qwen2.5-7B-Instruct,test/counting_and_probability/737.json,Counting & Probability,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/counting_and_probability/116.json,Counting & Probability,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/counting_and_probability/238.json,Counting & Probability,general,B,0.0,0.85,A,1.0,A,0.99,A,1.0,A
Qwen2.5-7B-Instruct,test/counting_and_probability/1014.json,Counting & Probability,general,A,0.0,0.95,B,0.9,B,0.98,B,1.0,A
Qwen2.5-7B-Instruct,test/counting_and_probability/14.json,Counting & Probability,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-7B-Instruct,test/counting_and_probability/188.json,Counting & Probability,general,A,0.0,0.95,B,0.85,B,0.9,,,B
Qwen2.5-7B-Instruct,test/counting_and_probability/761.json,Counting & Probability,general,A,1.0,0.95,A,0.9,A,0.98,B,0.9,B
Qwen2.5-7B-Instruct,test/counting_and_probability/10.json,Counting & Probability,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,A
Qwen2.5-7B-Instruct,test/counting_and_probability/731.json,Counting & Probability,general,B,0.0,0.85,A,0.75,A,0.7,A,0.8,A
Qwen2.5-7B-Instruct,test/counting_and_probability/190.json,Counting & Probability,general,A,0.0,0.95,B,0.95,B,0.98,B,1.0,A
Qwen2.5-7B-Instruct,test/counting_and_probability/1003.json,Counting & Probability,general,A,0.0,0.85,B,0.6,B,0.9,,,A
Qwen2.5-7B-Instruct,test/counting_and_probability/199.json,Counting & Probability,general,B,0.0,0.9,B,0.85,A,0.95,,,A
Qwen2.5-Math-7B-Instruct,test/precalculus/807.json,Precalculus,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/precalculus/927.json,Precalculus,general,B,0.0,0.85,A,0.95,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/1303.json,Precalculus,general,A,1.0,1.0,A,0.6,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/990.json,Precalculus,general,A,0.0,0.95,B,0.6,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/1199.json,Precalculus,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/precalculus/779.json,Precalculus,general,A,0.0,0.85,B,0.95,A,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/precalculus/285.json,Precalculus,general,A,0.0,1.0,B,0.85,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/precalculus/1105.json,Precalculus,general,B,1.0,1.0,B,0.6,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/precalculus/675.json,Precalculus,general,Tie,0.5,0.5,B,0.75,B,0.95,,,A
Qwen2.5-Math-7B-Instruct,test/precalculus/1146.json,Precalculus,general,A,1.0,0.9,A,0.85,B,0.85,,,A
Qwen2.5-Math-7B-Instruct,test/precalculus/1313.json,Precalculus,general,A,1.0,0.8,A,1.0,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/precalculus/24313.json,Precalculus,general,A,1.0,1.0,A,0.5,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/precalculus/34.json,Precalculus,general,A,1.0,0.75,A,0.8,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/precalculus/1300.json,Precalculus,general,Tie,0.5,0.5,B,0.8,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/44.json,Precalculus,general,Tie,0.5,0.5,B,1.0,B,0.98,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/precalculus/477.json,Precalculus,general,A,0.0,1.0,B,0.6,A,0.99,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/43.json,Precalculus,general,Tie,0.5,0.5,B,0.7,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/986.json,Precalculus,general,Tie,0.5,0.5,B,0.95,A,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/precalculus/117.json,Precalculus,general,A,0.0,0.85,B,0.75,B,0.85,,,A
Qwen2.5-Math-7B-Instruct,test/precalculus/697.json,Precalculus,general,Tie,0.5,0.5,B,0.65,A,0.85,B,0.75,A
Qwen2.5-Math-7B-Instruct,test/precalculus/659.json,Precalculus,general,B,1.0,1.0,B,0.95,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/precalculus/263.json,Precalculus,general,A,1.0,0.9,B,0.55,A,1.0,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/541.json,Precalculus,general,A,1.0,0.9,B,0.85,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/precalculus/190.json,Precalculus,general,A,1.0,0.85,A,0.75,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/precalculus/819.json,Precalculus,general,A,1.0,0.9,A,0.55,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/1056.json,Precalculus,general,A,1.0,1.0,A,0.5,A,1.0,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/441.json,Precalculus,general,A,1.0,0.9,B,0.6,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/989.json,Precalculus,general,A,0.0,0.9,B,0.7,B,0.6,A,0.8,B
Qwen2.5-Math-7B-Instruct,test/precalculus/920.json,Precalculus,general,B,0.0,0.9,A,1.0,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/452.json,Precalculus,general,A,0.0,0.85,B,0.6,B,0.99,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/precalculus/580.json,Precalculus,general,A,1.0,1.0,A,0.5,A,1.0,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/precalculus/768.json,Precalculus,general,Tie,0.5,0.5,B,1.0,B,0.99,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/precalculus/1172.json,Precalculus,general,A,1.0,0.9,A,0.85,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/precalculus/1201.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/precalculus/881.json,Precalculus,general,A,0.0,0.85,B,0.51,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/695.json,Precalculus,general,A,0.0,0.8,B,0.7,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/742.json,Precalculus,general,A,0.0,0.9,B,0.85,B,0.95,,,B
Qwen2.5-Math-7B-Instruct,test/precalculus/801.json,Precalculus,general,Tie,0.5,0.5,B,0.85,B,0.85,,,A
Qwen2.5-Math-7B-Instruct,test/precalculus/826.json,Precalculus,general,A,0.0,1.0,B,0.75,B,0.9,B,0.85,A
Qwen2.5-Math-7B-Instruct,test/precalculus/1281.json,Precalculus,general,B,0.0,0.9,A,0.65,A,0.95,,,B
Qwen2.5-Math-7B-Instruct,test/precalculus/96.json,Precalculus,general,B,0.0,0.95,A,0.9,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/precalculus/1289.json,Precalculus,general,A,0.0,0.85,B,0.51,A,0.95,B,0.6,A
Qwen2.5-Math-7B-Instruct,test/precalculus/902.json,Precalculus,general,A,0.0,0.5,B,0.75,B,0.75,B,0.8,A
Qwen2.5-Math-7B-Instruct,test/precalculus/1291.json,Precalculus,general,A,0.0,1.0,B,0.9,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/precalculus/398.json,Precalculus,general,B,1.0,1.0,B,0.95,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/681.json,Precalculus,general,A,0.0,0.9,B,0.9,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/145.json,Precalculus,general,A,0.0,1.0,B,0.85,B,0.95,B,0.9,B
Qwen2.5-Math-7B-Instruct,test/precalculus/625.json,Precalculus,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/precalculus/1202.json,Precalculus,general,Tie,0.5,0.5,A,0.95,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/precalculus/1133.json,Precalculus,general,A,1.0,0.8,A,0.7,A,0.85,,,A
Qwen2.5-Math-7B-Instruct,test/precalculus/499.json,Precalculus,general,A,0.0,0.9,B,0.85,A,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/precalculus/323.json,Precalculus,general,A,0.0,0.85,B,0.6,B,0.9,,,B
Qwen2.5-Math-7B-Instruct,test/precalculus/703.json,Precalculus,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/precalculus/1252.json,Precalculus,general,B,1.0,1.0,B,0.85,B,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/precalculus/1082.json,Precalculus,general,B,1.0,0.85,B,0.98,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/precalculus/356.json,Precalculus,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1994.json,Intermediate Algebra,general,A,1.0,1.0,A,0.6,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1197.json,Intermediate Algebra,general,A,1.0,0.9,A,0.6,B,0.5,A,0.6,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/134.json,Intermediate Algebra,general,A,0.0,1.0,B,0.95,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1000.json,Intermediate Algebra,general,B,1.0,1.0,B,0.51,A,0.99,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/607.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,A,0.98,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1388.json,Intermediate Algebra,general,Tie,0.5,0.5,B,1.0,B,0.98,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/428.json,Intermediate Algebra,general,B,1.0,0.8,B,0.95,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1454.json,Intermediate Algebra,general,B,0.0,0.9,B,0.6,A,0.6,A,0.8,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1217.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.95,A,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1168.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.85,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/956.json,Intermediate Algebra,general,B,1.0,1.0,B,0.65,B,0.6,A,0.7,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1247.json,Intermediate Algebra,general,A,1.0,1.0,B,0.5,A,1.0,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/279.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.51,A,1.0,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/207.json,Intermediate Algebra,general,B,0.0,1.0,B,0.6,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/623.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.55,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/47.json,Intermediate Algebra,general,A,1.0,0.85,B,0.9,A,0.98,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1849.json,Intermediate Algebra,general,B,1.0,1.0,B,0.75,B,0.85,B,0.8,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/2046.json,Intermediate Algebra,general,B,0.0,0.9,A,0.85,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/662.json,Intermediate Algebra,general,B,1.0,0.75,B,0.85,B,0.9,,,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/582.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.85,B,0.9,,,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/431.json,Intermediate Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/558.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.85,B,0.65,B,0.7,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/362.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.65,A,0.85,A,0.7,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/515.json,Intermediate Algebra,general,A,0.0,0.9,B,0.75,B,0.98,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/894.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.95,B,0.92,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/345.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.75,B,0.95,,,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1898.json,Intermediate Algebra,general,A,1.0,0.9,B,0.55,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/232.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.5,A,0.98,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/128.json,Intermediate Algebra,general,A,0.0,1.0,B,1.0,B,1.0,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1063.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.85,B,0.85,,,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1126.json,Intermediate Algebra,general,B,1.0,0.85,B,0.9,A,0.98,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/2022.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1151.json,Intermediate Algebra,general,A,1.0,0.9,A,0.9,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1408.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/966.json,Intermediate Algebra,general,A,1.0,0.9,A,1.0,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/964.json,Intermediate Algebra,general,A,0.0,0.85,B,0.65,B,0.95,,,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1410.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.55,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/991.json,Intermediate Algebra,general,A,1.0,0.85,A,0.55,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/183.json,Intermediate Algebra,general,B,1.0,1.0,B,0.85,B,0.6,A,0.6,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1422.json,Intermediate Algebra,general,B,0.0,0.9,A,0.55,A,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/2196.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.75,A,0.9,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/591.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.51,A,1.0,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1555.json,Intermediate Algebra,general,B,0.0,0.8,B,0.51,A,0.99,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1510.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.95,B,0.65,A,0.7,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/102.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.55,A,0.99,,,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/986.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1354.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1837.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,A,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/337.json,Intermediate Algebra,general,A,1.0,0.9,A,0.55,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1210.json,Intermediate Algebra,general,B,0.0,1.0,B,0.65,A,0.9,,,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1123.json,Intermediate Algebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/149.json,Intermediate Algebra,general,B,1.0,0.5,B,0.51,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1411.json,Intermediate Algebra,general,B,1.0,0.8,B,0.7,B,0.75,B,0.7,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/960.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.6,B,0.65,,,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1300.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,A,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/90.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.8,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/754.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/446.json,Intermediate Algebra,general,A,1.0,0.9,B,0.52,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1544.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.65,B,0.85,,,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1714.json,Intermediate Algebra,general,A,0.0,1.0,B,0.8,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/2152.json,Intermediate Algebra,general,A,0.0,0.8,B,0.85,B,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/117.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.85,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/190.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.6,B,0.85,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/776.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.9,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1566.json,Intermediate Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1572.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.51,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1166.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.85,B,0.85,,,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/860.json,Intermediate Algebra,general,A,0.0,0.9,B,1.0,B,0.98,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1407.json,Intermediate Algebra,general,Tie,0.5,0.5,A,1.0,A,0.98,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1405.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/690.json,Intermediate Algebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/955.json,Intermediate Algebra,general,A,0.0,1.0,B,0.75,B,0.95,B,0.8,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1992.json,Intermediate Algebra,general,A,1.0,0.9,B,0.85,A,0.85,,,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1111.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1791.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,B,0.99,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1806.json,Intermediate Algebra,general,B,0.0,0.9,B,0.7,A,0.85,A,0.8,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1797.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/2146.json,Intermediate Algebra,general,Tie,0.5,0.5,A,1.0,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/2015.json,Intermediate Algebra,general,A,0.0,0.95,B,0.75,B,0.95,,,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/121.json,Intermediate Algebra,general,A,0.0,1.0,B,0.55,B,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1014.json,Intermediate Algebra,general,B,0.0,1.0,A,1.0,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1462.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.75,B,0.85,B,0.9,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/199.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.9,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1779.json,Intermediate Algebra,general,A,1.0,0.85,B,0.7,A,0.9,A,0.7,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1102.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/834.json,Intermediate Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/158.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/752.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1279.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.6,B,0.95,,,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1467.json,Intermediate Algebra,general,A,0.0,1.0,A,0.6,B,0.75,,,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/101.json,Intermediate Algebra,general,A,1.0,0.9,B,0.95,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1365.json,Intermediate Algebra,general,B,0.0,0.9,A,0.8,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1350.json,Intermediate Algebra,general,B,1.0,0.9,B,0.75,B,0.95,,,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1930.json,Intermediate Algebra,general,A,0.0,0.85,B,0.95,B,0.95,B,0.9,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1981.json,Intermediate Algebra,general,A,1.0,0.95,B,0.55,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1232.json,Intermediate Algebra,general,B,1.0,0.9,B,0.95,B,0.99,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/intermediate_algebra/1508.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,B,0.9,A
Qwen2.5-Math-7B-Instruct,test/algebra/2584.json,Algebra,general,B,0.0,1.0,A,0.51,A,1.0,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/1349.json,Algebra,general,A,0.0,1.0,B,0.6,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/2036.json,Algebra,general,A,0.0,1.0,B,0.6,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/1098.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.8,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/1837.json,Algebra,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2193.json,Algebra,general,A,0.0,1.0,B,0.9,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/2427.json,Algebra,general,Tie,0.5,0.5,B,0.51,A,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/1072.json,Algebra,general,Tie,0.5,0.5,A,1.0,A,1.0,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/24.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2214.json,Algebra,general,Tie,0.5,0.5,B,0.9,A,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/305.json,Algebra,general,A,1.0,0.8,A,0.5,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/1265.json,Algebra,general,Tie,0.5,0.5,A,0.51,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/187.json,Algebra,general,B,0.0,1.0,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/769.json,Algebra,general,Tie,0.5,0.5,A,0.9,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/722.json,Algebra,general,B,1.0,0.9,B,0.75,B,0.95,A,0.7,B
Qwen2.5-Math-7B-Instruct,test/algebra/2046.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/2253.json,Algebra,general,A,1.0,1.0,A,0.6,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/1004.json,Algebra,general,A,1.0,0.95,A,0.51,A,0.95,A,0.6,B
Qwen2.5-Math-7B-Instruct,test/algebra/1035.json,Algebra,general,B,0.0,0.95,A,0.85,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/2700.json,Algebra,general,Tie,0.5,0.5,B,0.51,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/893.json,Algebra,general,A,1.0,1.0,B,0.55,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/567.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/892.json,Algebra,general,B,1.0,1.0,B,0.85,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2023.json,Algebra,general,Tie,0.5,0.5,B,0.5,A,0.99,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/873.json,Algebra,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/2058.json,Algebra,general,A,1.0,0.95,A,0.85,A,1.0,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2593.json,Algebra,general,A,1.0,1.0,B,0.51,A,1.0,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/2157.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/2251.json,Algebra,general,A,1.0,0.9,B,0.51,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/1332.json,Algebra,general,A,1.0,1.0,B,0.51,A,1.0,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/972.json,Algebra,general,Tie,0.5,0.5,A,0.51,A,0.99,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/2232.json,Algebra,general,Tie,0.5,0.5,B,0.6,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/661.json,Algebra,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/246.json,Algebra,general,B,1.0,0.9,B,0.95,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/1519.json,Algebra,general,A,0.0,0.75,B,0.85,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/988.json,Algebra,general,A,0.0,0.9,B,0.55,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2570.json,Algebra,general,B,1.0,1.0,B,0.75,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/621.json,Algebra,general,A,0.0,0.95,B,0.6,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/1255.json,Algebra,general,A,1.0,1.0,A,0.55,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/2517.json,Algebra,general,Tie,0.5,0.5,A,0.55,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/478.json,Algebra,general,A,0.0,0.9,B,0.85,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/297.json,Algebra,general,A,0.0,0.95,B,1.0,B,0.98,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/841.json,Algebra,general,A,1.0,0.9,A,0.85,A,0.98,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/686.json,Algebra,general,A,1.0,0.9,A,0.9,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/351.json,Algebra,general,Tie,0.5,0.5,B,0.9,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/1275.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/1082.json,Algebra,general,A,1.0,1.0,B,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/1214.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/2199.json,Algebra,general,A,1.0,1.0,A,0.5,A,0.99,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/733.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.98,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/109.json,Algebra,general,Tie,0.5,0.5,B,0.9,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/1937.json,Algebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/291.json,Algebra,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/2102.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/907.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/864.json,Algebra,general,B,1.0,0.8,B,0.98,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/2159.json,Algebra,general,B,1.0,0.85,B,0.95,B,0.98,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/1578.json,Algebra,general,Tie,0.5,0.5,B,0.85,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/975.json,Algebra,general,Tie,0.5,0.5,B,0.85,A,1.0,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/1143.json,Algebra,general,B,0.0,0.95,B,0.51,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2626.json,Algebra,general,Tie,0.5,0.5,B,0.5,B,0.9,A,0.8,A
Qwen2.5-Math-7B-Instruct,test/algebra/1787.json,Algebra,general,Tie,0.5,0.5,B,0.9,B,0.98,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/1934.json,Algebra,general,A,0.0,0.9,B,0.85,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2064.json,Algebra,general,A,0.0,0.75,B,1.0,B,1.0,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/694.json,Algebra,general,A,1.0,0.85,A,0.55,A,1.0,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/524.json,Algebra,general,B,1.0,0.9,B,0.85,A,0.99,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/2551.json,Algebra,general,B,0.0,1.0,A,0.5,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/346.json,Algebra,general,A,1.0,1.0,A,0.6,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/1282.json,Algebra,general,A,1.0,0.95,A,0.5,A,1.0,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/1184.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.9,A
Qwen2.5-Math-7B-Instruct,test/algebra/634.json,Algebra,general,A,1.0,0.85,A,0.9,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2486.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.99,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2257.json,Algebra,general,Tie,0.5,0.5,B,0.6,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/1842.json,Algebra,general,B,0.0,1.0,A,0.85,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/791.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/276.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/2735.json,Algebra,general,Tie,0.5,0.5,B,0.85,B,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/425.json,Algebra,general,A,1.0,0.85,A,0.51,A,0.99,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/1936.json,Algebra,general,Tie,0.5,0.5,B,0.51,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/2176.json,Algebra,general,Tie,0.5,0.5,A,0.75,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/509.json,Algebra,general,A,1.0,0.85,B,0.6,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/1457.json,Algebra,general,Tie,0.5,0.5,B,0.85,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2592.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/858.json,Algebra,general,A,1.0,1.0,A,0.8,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/1529.json,Algebra,general,Tie,0.5,0.5,B,0.6,B,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/1338.json,Algebra,general,B,1.0,0.9,B,0.85,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/1547.json,Algebra,general,A,1.0,0.8,A,0.6,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/529.json,Algebra,general,B,1.0,0.85,B,0.55,A,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/1078.json,Algebra,general,A,1.0,0.9,A,0.55,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/251.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/1199.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2264.json,Algebra,general,A,0.0,1.0,B,0.55,B,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/1303.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/101.json,Algebra,general,Tie,0.5,0.5,A,0.51,A,0.99,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/170.json,Algebra,general,A,1.0,1.0,A,0.6,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/849.json,Algebra,general,B,0.0,1.0,B,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/1031.json,Algebra,general,Tie,0.5,0.5,B,0.51,A,1.0,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/853.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/2277.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/518.json,Algebra,general,A,0.0,1.0,B,0.85,B,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/114.json,Algebra,general,A,0.0,1.0,B,0.95,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/1960.json,Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/2680.json,Algebra,general,B,0.0,0.9,B,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/2391.json,Algebra,general,A,1.0,0.9,A,0.55,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/776.json,Algebra,general,Tie,0.5,0.5,B,0.55,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/1796.json,Algebra,general,A,1.0,1.0,B,0.51,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/1339.json,Algebra,general,A,0.0,0.85,B,0.95,B,0.99,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/2743.json,Algebra,general,Tie,0.5,0.5,B,0.51,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2043.json,Algebra,general,A,0.0,0.95,B,0.65,A,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/1553.json,Algebra,general,A,1.0,0.8,A,1.0,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/2080.json,Algebra,general,A,0.0,1.0,B,0.55,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/1343.json,Algebra,general,A,0.0,0.95,B,0.51,A,1.0,B,0.9,A
Qwen2.5-Math-7B-Instruct,test/algebra/668.json,Algebra,general,A,0.0,0.85,B,0.85,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2430.json,Algebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/2789.json,Algebra,general,Tie,0.5,0.5,B,0.55,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/algebra/1814.json,Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/2476.json,Algebra,general,Tie,0.5,0.5,A,0.6,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/algebra/2780.json,Algebra,general,Tie,0.5,0.5,B,0.51,A,0.99,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/824.json,Algebra,general,A,1.0,0.9,A,0.55,A,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/algebra/1425.json,Algebra,general,B,1.0,0.95,B,0.85,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/224.json,Algebra,general,B,1.0,0.9,B,0.51,A,0.99,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/435.json,Algebra,general,A,1.0,0.9,A,0.51,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2470.json,Algebra,general,Tie,0.5,0.5,B,0.85,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/algebra/2779.json,Algebra,general,A,1.0,0.95,B,0.52,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/572.json,Number Theory,general,A,0.0,0.9,B,0.55,A,0.95,B,0.9,A
Qwen2.5-Math-7B-Instruct,test/number_theory/515.json,Number Theory,general,A,1.0,0.9,A,0.9,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/1032.json,Number Theory,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/number_theory/737.json,Number Theory,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/number_theory/864.json,Number Theory,general,B,0.0,0.85,B,0.85,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/number_theory/627.json,Number Theory,general,B,1.0,0.9,B,0.85,B,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/45.json,Number Theory,general,B,1.0,1.0,B,0.85,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/number_theory/1055.json,Number Theory,general,Tie,0.5,0.5,A,0.51,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/46.json,Number Theory,general,A,1.0,0.8,A,0.9,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/516.json,Number Theory,general,B,0.0,0.9,A,0.9,A,0.95,B,0.9,A
Qwen2.5-Math-7B-Instruct,test/number_theory/357.json,Number Theory,general,Tie,0.5,0.5,B,0.51,A,0.95,A,0.9,A
Qwen2.5-Math-7B-Instruct,test/number_theory/914.json,Number Theory,general,B,0.0,0.9,A,0.55,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/847.json,Number Theory,general,Tie,0.5,0.5,A,0.55,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/753.json,Number Theory,general,A,1.0,0.85,A,0.6,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/1257.json,Number Theory,general,Tie,0.5,0.5,B,0.85,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/156.json,Number Theory,general,A,0.0,1.0,B,0.51,A,0.99,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/number_theory/612.json,Number Theory,general,A,0.0,1.0,B,0.6,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/931.json,Number Theory,general,B,1.0,0.9,B,0.51,A,0.99,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/521.json,Number Theory,general,B,1.0,0.85,B,0.9,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/598.json,Number Theory,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/978.json,Number Theory,general,A,1.0,1.0,B,0.51,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/number_theory/838.json,Number Theory,general,A,1.0,0.95,A,0.65,B,0.95,A,0.8,B
Qwen2.5-Math-7B-Instruct,test/number_theory/149.json,Number Theory,general,A,0.0,0.8,B,0.6,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/1201.json,Number Theory,general,Tie,0.5,0.5,B,0.85,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/234.json,Number Theory,general,A,0.0,0.85,B,0.85,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/417.json,Number Theory,general,A,1.0,0.8,A,1.0,A,1.0,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/number_theory/89.json,Number Theory,general,B,0.0,0.55,A,0.85,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/number_theory/183.json,Number Theory,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/1065.json,Number Theory,general,A,1.0,1.0,A,0.55,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/466.json,Number Theory,general,A,1.0,0.8,B,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/634.json,Number Theory,general,A,0.0,0.8,B,0.85,B,0.98,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/533.json,Number Theory,general,B,0.0,1.0,A,1.0,A,0.9,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/number_theory/691.json,Number Theory,general,A,0.0,1.0,B,0.95,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/number_theory/1287.json,Number Theory,general,Tie,0.5,0.5,B,0.85,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/number_theory/631.json,Number Theory,general,A,0.0,0.9,B,0.9,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/number_theory/488.json,Number Theory,general,B,0.0,1.0,A,0.85,A,0.85,A,0.8,B
Qwen2.5-Math-7B-Instruct,test/number_theory/1172.json,Number Theory,general,A,1.0,0.9,A,0.95,A,0.98,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/203.json,Number Theory,general,B,0.0,0.95,A,1.0,A,1.0,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/number_theory/911.json,Number Theory,general,A,0.0,0.9,B,1.0,B,1.0,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/number_theory/483.json,Number Theory,general,A,1.0,0.8,A,0.51,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/368.json,Number Theory,general,Tie,0.5,0.5,B,0.55,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/686.json,Number Theory,general,B,1.0,0.85,B,0.85,B,0.95,A,0.9,A
Qwen2.5-Math-7B-Instruct,test/number_theory/820.json,Number Theory,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/109.json,Number Theory,general,B,1.0,0.95,B,0.55,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/427.json,Number Theory,general,A,1.0,0.9,A,0.65,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/number_theory/1185.json,Number Theory,general,Tie,0.5,0.5,B,0.55,A,0.95,A,0.9,A
Qwen2.5-Math-7B-Instruct,test/number_theory/928.json,Number Theory,general,A,1.0,1.0,A,0.51,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/132.json,Number Theory,general,B,1.0,1.0,B,0.85,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/769.json,Number Theory,general,A,0.0,1.0,B,0.51,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/1002.json,Number Theory,general,A,1.0,0.9,A,0.95,A,0.99,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/410.json,Number Theory,general,Tie,0.5,0.5,B,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/255.json,Number Theory,general,A,0.0,1.0,B,0.85,B,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/1000.json,Number Theory,general,A,1.0,1.0,A,0.85,A,0.98,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/13.json,Number Theory,general,Tie,0.5,0.5,A,0.5,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/459.json,Number Theory,general,A,1.0,1.0,A,0.51,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/342.json,Number Theory,general,A,0.0,0.95,B,0.75,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/679.json,Number Theory,general,B,0.0,1.0,A,0.95,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/72.json,Number Theory,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/number_theory/22.json,Number Theory,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/number_theory/1128.json,Number Theory,general,Tie,0.5,0.5,B,0.85,B,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/number_theory/1090.json,Number Theory,general,A,1.0,1.0,A,0.95,A,1.0,B,0.9,B
Qwen2.5-Math-7B-Instruct,test/number_theory/239.json,Number Theory,general,Tie,0.5,0.5,B,1.0,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1622.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1139.json,Prealgebra,general,Tie,0.5,0.5,B,0.8,B,0.7,,,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1840.json,Prealgebra,general,A,1.0,0.6,B,0.6,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1302.json,Prealgebra,general,A,0.0,0.8,B,0.95,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/930.json,Prealgebra,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1558.json,Prealgebra,general,A,0.0,1.0,B,0.6,A,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1388.json,Prealgebra,general,A,0.0,0.85,B,0.51,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/951.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/572.json,Prealgebra,general,Tie,0.5,0.5,B,0.95,B,0.95,A,0.9,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1247.json,Prealgebra,general,A,1.0,0.9,B,0.51,A,1.0,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1747.json,Prealgebra,general,A,1.0,0.95,A,0.55,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1233.json,Prealgebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/192.json,Prealgebra,general,A,1.0,1.0,B,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/307.json,Prealgebra,general,Tie,0.5,0.5,B,0.75,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1761.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1646.json,Prealgebra,general,A,0.0,0.9,B,0.6,B,0.75,,,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/105.json,Prealgebra,general,A,0.0,1.0,B,0.65,A,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1924.json,Prealgebra,general,A,0.0,0.95,B,0.9,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1804.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,1.0,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1733.json,Prealgebra,general,A,0.0,0.85,B,0.95,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/505.json,Prealgebra,general,A,0.0,0.8,B,0.6,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1686.json,Prealgebra,general,A,0.0,0.9,B,0.85,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1807.json,Prealgebra,general,A,0.0,0.8,B,0.51,B,0.99,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1297.json,Prealgebra,general,A,0.0,1.0,B,0.51,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1655.json,Prealgebra,general,A,1.0,0.85,A,0.5,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1356.json,Prealgebra,general,A,1.0,0.9,A,0.5,A,1.0,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1003.json,Prealgebra,general,A,0.0,1.0,B,0.7,B,0.85,,,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1272.json,Prealgebra,general,B,0.0,1.0,A,0.85,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1113.json,Prealgebra,general,A,1.0,0.8,A,0.9,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1908.json,Prealgebra,general,A,0.0,0.85,B,0.85,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1922.json,Prealgebra,general,B,0.0,0.9,A,0.6,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1907.json,Prealgebra,general,A,1.0,1.0,A,0.6,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/2086.json,Prealgebra,general,Tie,0.5,0.5,B,0.51,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/378.json,Prealgebra,general,B,1.0,0.85,B,0.75,B,0.85,,,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1555.json,Prealgebra,general,A,0.0,0.9,B,0.85,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1436.json,Prealgebra,general,Tie,0.5,0.5,B,0.75,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1961.json,Prealgebra,general,A,0.0,0.9,B,0.65,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/2057.json,Prealgebra,general,A,0.0,1.0,B,0.95,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/153.json,Prealgebra,general,A,0.0,0.85,B,0.51,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/874.json,Prealgebra,general,Tie,0.5,0.5,B,0.65,A,0.85,,,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1251.json,Prealgebra,general,Tie,0.5,0.5,A,0.6,A,0.98,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1458.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1995.json,Prealgebra,general,Tie,0.5,0.5,B,0.85,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1317.json,Prealgebra,general,Tie,0.5,0.5,B,0.95,B,0.99,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1742.json,Prealgebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/993.json,Prealgebra,general,A,0.0,0.9,B,0.6,A,0.99,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1834.json,Prealgebra,general,B,1.0,0.9,B,0.6,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1512.json,Prealgebra,general,A,1.0,0.9,A,0.51,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/260.json,Prealgebra,general,A,0.0,0.9,B,0.65,B,0.75,A,0.7,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1787.json,Prealgebra,general,B,0.0,0.9,B,0.95,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1044.json,Prealgebra,general,B,0.0,0.9,A,1.0,A,1.0,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/465.json,Prealgebra,general,B,1.0,0.85,B,1.0,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1423.json,Prealgebra,general,A,1.0,1.0,A,0.75,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/954.json,Prealgebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1973.json,Prealgebra,general,B,1.0,1.0,B,0.95,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1730.json,Prealgebra,general,A,1.0,0.9,B,0.95,A,0.9,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1238.json,Prealgebra,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1353.json,Prealgebra,general,A,0.0,0.75,B,0.51,A,0.95,B,0.75,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1187.json,Prealgebra,general,B,0.0,1.0,A,1.0,A,1.0,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1743.json,Prealgebra,general,A,1.0,0.95,B,0.51,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1865.json,Prealgebra,general,B,1.0,0.9,B,0.8,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1298.json,Prealgebra,general,Tie,0.5,0.5,B,0.85,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/2066.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.9,A,0.8,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/631.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.9,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/977.json,Prealgebra,general,A,0.0,1.0,B,0.55,A,1.0,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1991.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1784.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1572.json,Prealgebra,general,B,1.0,1.0,B,0.95,B,0.98,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/65.json,Prealgebra,general,B,1.0,1.0,B,0.55,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1227.json,Prealgebra,general,A,0.0,0.9,B,1.0,B,1.0,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/2019.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1640.json,Prealgebra,general,A,1.0,0.85,A,0.9,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/2037.json,Prealgebra,general,B,1.0,1.0,B,0.9,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/996.json,Prealgebra,general,Tie,0.5,0.5,B,0.55,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/805.json,Prealgebra,general,A,1.0,1.0,A,0.55,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/914.json,Prealgebra,general,A,0.0,0.9,B,1.0,B,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1114.json,Prealgebra,general,A,0.0,0.85,B,0.7,B,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/846.json,Prealgebra,general,A,0.0,0.9,B,0.85,A,0.95,B,0.9,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1930.json,Prealgebra,general,Tie,0.5,0.5,B,0.6,A,0.95,B,0.9,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1252.json,Prealgebra,general,A,1.0,0.9,B,0.55,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/prealgebra/1203.json,Prealgebra,general,A,0.0,0.9,B,0.51,B,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/prealgebra/1128.json,Prealgebra,general,B,1.0,1.0,B,0.95,A,0.95,B,0.7,B
Qwen2.5-Math-7B-Instruct,test/geometry/248.json,Geometry,general,Tie,0.5,0.5,B,0.55,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/geometry/434.json,Geometry,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/geometry/967.json,Geometry,general,A,1.0,0.85,A,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/geometry/627.json,Geometry,general,A,0.0,1.0,B,0.9,A,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/geometry/178.json,Geometry,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/geometry/456.json,Geometry,general,A,1.0,0.8,A,0.51,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/geometry/353.json,Geometry,general,A,0.0,0.8,B,0.6,B,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/geometry/183.json,Geometry,general,B,1.0,0.9,A,0.6,B,0.65,,,A
Qwen2.5-Math-7B-Instruct,test/geometry/283.json,Geometry,general,B,0.0,0.6,A,0.51,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/geometry/1140.json,Geometry,general,A,0.0,0.85,B,0.6,B,0.95,B,0.9,B
Qwen2.5-Math-7B-Instruct,test/geometry/172.json,Geometry,general,A,0.0,1.0,A,0.7,B,0.85,,,A
Qwen2.5-Math-7B-Instruct,test/geometry/880.json,Geometry,general,A,1.0,0.9,B,0.6,A,0.7,,,B
Qwen2.5-Math-7B-Instruct,test/geometry/802.json,Geometry,general,B,0.0,0.8,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/geometry/65.json,Geometry,general,B,0.0,1.0,A,0.7,A,0.85,A,0.65,B
Qwen2.5-Math-7B-Instruct,test/geometry/702.json,Geometry,general,Tie,0.5,0.5,B,0.7,B,0.9,,,A
Qwen2.5-Math-7B-Instruct,test/geometry/221.json,Geometry,general,A,1.0,1.0,A,0.51,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/geometry/547.json,Geometry,general,A,0.0,0.95,A,0.6,B,0.9,,,A
Qwen2.5-Math-7B-Instruct,test/geometry/229.json,Geometry,general,Tie,0.5,0.5,B,0.98,B,0.9,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/geometry/254.json,Geometry,general,B,1.0,0.95,B,0.85,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/geometry/473.json,Geometry,general,A,0.0,0.9,B,0.6,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/geometry/347.json,Geometry,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/geometry/483.json,Geometry,general,A,1.0,0.95,A,0.85,A,1.0,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/geometry/826.json,Geometry,general,Tie,0.5,0.5,A,0.7,A,0.75,B,0.8,A
Qwen2.5-Math-7B-Instruct,test/geometry/226.json,Geometry,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/geometry/686.json,Geometry,general,B,1.0,1.0,B,0.8,B,0.98,,,B
Qwen2.5-Math-7B-Instruct,test/geometry/1097.json,Geometry,general,Tie,0.5,0.5,B,0.85,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/geometry/965.json,Geometry,general,A,1.0,0.9,A,0.95,A,0.9,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/geometry/711.json,Geometry,general,A,1.0,0.9,A,0.65,A,0.85,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/geometry/1108.json,Geometry,general,A,1.0,1.0,A,0.55,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/geometry/947.json,Geometry,general,A,1.0,1.0,A,0.95,A,0.98,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/geometry/465.json,Geometry,general,Tie,0.5,0.5,B,1.0,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/geometry/73.json,Geometry,general,Tie,0.5,0.5,B,0.51,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/geometry/106.json,Geometry,general,B,1.0,0.75,B,0.51,A,0.99,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/geometry/846.json,Geometry,general,Tie,0.5,0.5,B,0.95,B,0.95,,,B
Qwen2.5-Math-7B-Instruct,test/geometry/538.json,Geometry,general,B,0.0,0.95,A,0.9,A,0.98,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/geometry/795.json,Geometry,general,A,0.0,0.8,B,0.95,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/geometry/817.json,Geometry,general,A,0.0,0.85,A,0.65,B,0.9,,,B
Qwen2.5-Math-7B-Instruct,test/geometry/843.json,Geometry,general,A,0.0,0.9,B,0.51,B,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/geometry/477.json,Geometry,general,B,0.0,0.95,A,0.51,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/geometry/561.json,Geometry,general,A,0.0,0.9,B,0.65,B,0.85,B,0.8,A
Qwen2.5-Math-7B-Instruct,test/geometry/615.json,Geometry,general,Tie,0.5,0.5,B,0.95,B,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/525.json,Counting & Probability,general,A,1.0,1.0,B,0.75,A,0.95,,,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/666.json,Counting & Probability,general,Tie,0.5,0.5,B,0.6,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/134.json,Counting & Probability,general,Tie,0.5,0.5,B,0.6,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/119.json,Counting & Probability,general,B,0.0,0.9,B,0.55,A,1.0,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/1114.json,Counting & Probability,general,A,1.0,0.85,B,0.9,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/377.json,Counting & Probability,general,Tie,0.5,0.5,A,0.55,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/23957.json,Counting & Probability,general,A,1.0,1.0,B,0.55,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/1060.json,Counting & Probability,general,B,1.0,1.0,B,0.85,B,0.9,B,0.9,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/430.json,Counting & Probability,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/159.json,Counting & Probability,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/230.json,Counting & Probability,general,A,1.0,1.0,A,0.7,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/803.json,Counting & Probability,general,B,0.0,1.0,A,0.6,A,0.95,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/181.json,Counting & Probability,general,B,0.0,0.85,B,0.65,A,0.5,A,0.6,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/51.json,Counting & Probability,general,A,0.0,0.85,B,0.55,A,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/508.json,Counting & Probability,general,A,1.0,0.9,B,0.51,A,0.99,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/389.json,Counting & Probability,general,A,0.0,0.8,B,0.95,B,0.95,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/765.json,Counting & Probability,general,Tie,0.5,0.5,B,0.85,B,0.95,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/282.json,Counting & Probability,general,A,0.0,1.0,B,0.6,A,0.5,,,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/71.json,Counting & Probability,general,A,1.0,0.85,A,0.7,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/894.json,Counting & Probability,general,A,0.0,0.9,B,0.95,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/1009.json,Counting & Probability,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/913.json,Counting & Probability,general,B,1.0,0.85,B,0.85,B,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/25149.json,Counting & Probability,general,A,1.0,0.9,A,0.51,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/339.json,Counting & Probability,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/870.json,Counting & Probability,general,A,1.0,1.0,A,0.85,B,0.95,A,0.95,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/216.json,Counting & Probability,general,B,0.0,0.95,B,0.51,A,0.99,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/737.json,Counting & Probability,general,Tie,0.5,0.5,B,0.85,A,0.99,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/116.json,Counting & Probability,general,A,0.0,0.85,B,0.95,A,1.0,B,0.95,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/238.json,Counting & Probability,general,A,1.0,1.0,A,0.95,A,1.0,A,1.0,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/1014.json,Counting & Probability,general,A,0.0,0.75,B,0.95,A,1.0,B,0.95,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/14.json,Counting & Probability,general,A,0.0,0.85,B,0.95,A,0.95,B,1.0,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/188.json,Counting & Probability,general,A,1.0,1.0,A,0.75,A,0.7,,,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/761.json,Counting & Probability,general,A,1.0,0.9,A,0.55,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/10.json,Counting & Probability,general,A,1.0,0.85,A,0.95,A,0.95,A,1.0,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/731.json,Counting & Probability,general,B,1.0,1.0,B,0.95,B,0.8,B,0.9,A
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/190.json,Counting & Probability,general,B,0.0,0.9,A,0.51,A,0.95,A,0.95,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/1003.json,Counting & Probability,general,Tie,0.5,0.5,B,1.0,B,0.98,B,1.0,B
Qwen2.5-Math-7B-Instruct,test/counting_and_probability/199.json,Counting & Probability,general,B,0.0,1.0,A,0.5,A,1.0,A,1.0,B
Llama-3.1-8B-Instruct,test/precalculus/807.json,Precalculus,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/precalculus/927.json,Precalculus,general,A,1.0,0.95,A,0.6,A,0.85,,,A
Llama-3.1-8B-Instruct,test/precalculus/1303.json,Precalculus,general,A,0.0,0.9,B,0.6,B,0.85,B,0.7,B
Llama-3.1-8B-Instruct,test/precalculus/990.json,Precalculus,general,A,1.0,0.9,A,0.85,B,0.95,A,0.8,B
Llama-3.1-8B-Instruct,test/precalculus/1199.json,Precalculus,general,A,0.0,0.9,B,0.65,B,0.85,,,A
Llama-3.1-8B-Instruct,test/precalculus/779.json,Precalculus,general,A,1.0,0.9,A,0.95,A,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/precalculus/285.json,Precalculus,general,A,1.0,0.9,A,0.75,A,0.9,A,0.9,A
Llama-3.1-8B-Instruct,test/precalculus/1105.json,Precalculus,general,A,0.0,0.95,B,0.75,B,0.7,B,0.6,A
Llama-3.1-8B-Instruct,test/precalculus/675.json,Precalculus,general,B,1.0,0.8,A,0.6,B,0.7,,,A
Llama-3.1-8B-Instruct,test/precalculus/1146.json,Precalculus,general,B,1.0,0.9,B,0.85,B,0.9,B,0.8,B
Llama-3.1-8B-Instruct,test/precalculus/1313.json,Precalculus,general,B,1.0,0.9,B,0.7,B,0.85,,,B
Llama-3.1-8B-Instruct,test/precalculus/24313.json,Precalculus,general,B,1.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/precalculus/34.json,Precalculus,general,A,1.0,0.9,A,1.0,A,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/precalculus/1300.json,Precalculus,general,A,0.0,0.6,B,0.75,B,0.95,,,A
Llama-3.1-8B-Instruct,test/precalculus/44.json,Precalculus,general,A,1.0,0.9,A,0.65,A,0.8,,,B
Llama-3.1-8B-Instruct,test/precalculus/477.json,Precalculus,general,A,1.0,0.9,B,0.85,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/precalculus/43.json,Precalculus,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/precalculus/986.json,Precalculus,general,A,1.0,0.9,A,0.85,A,0.85,A,0.7,B
Llama-3.1-8B-Instruct,test/precalculus/117.json,Precalculus,general,B,1.0,0.9,B,0.75,B,0.6,,,A
Llama-3.1-8B-Instruct,test/precalculus/697.json,Precalculus,general,B,0.0,0.8,A,0.65,A,0.85,A,0.7,A
Llama-3.1-8B-Instruct,test/precalculus/659.json,Precalculus,general,A,1.0,0.9,A,1.0,A,0.99,A,1.0,A
Llama-3.1-8B-Instruct,test/precalculus/263.json,Precalculus,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
Llama-3.1-8B-Instruct,test/precalculus/541.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/precalculus/190.json,Precalculus,general,A,1.0,0.9,A,0.65,B,0.95,A,0.7,A
Llama-3.1-8B-Instruct,test/precalculus/819.json,Precalculus,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/precalculus/1056.json,Precalculus,general,A,0.0,0.9,B,0.7,B,0.9,,,B
Llama-3.1-8B-Instruct,test/precalculus/441.json,Precalculus,general,A,0.0,0.8,B,0.85,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/precalculus/989.json,Precalculus,general,A,0.0,0.95,A,0.55,B,0.7,B,0.75,A
Llama-3.1-8B-Instruct,test/precalculus/920.json,Precalculus,general,A,1.0,0.95,A,0.75,A,0.7,A,0.6,B
Llama-3.1-8B-Instruct,test/precalculus/452.json,Precalculus,general,A,0.0,0.9,B,0.95,B,0.95,B,0.9,A
Llama-3.1-8B-Instruct,test/precalculus/580.json,Precalculus,general,A,0.0,0.9,B,0.85,B,0.9,,,B
Llama-3.1-8B-Instruct,test/precalculus/768.json,Precalculus,general,A,0.0,0.8,B,0.75,B,0.9,,,A
Llama-3.1-8B-Instruct,test/precalculus/1172.json,Precalculus,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,B
Llama-3.1-8B-Instruct,test/precalculus/1201.json,Precalculus,general,A,1.0,0.95,A,0.6,A,0.7,,,A
Llama-3.1-8B-Instruct,test/precalculus/881.json,Precalculus,general,A,1.0,0.9,A,0.85,B,0.9,A,0.65,B
Llama-3.1-8B-Instruct,test/precalculus/695.json,Precalculus,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/precalculus/742.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.99,B,1.0,A
Llama-3.1-8B-Instruct,test/precalculus/801.json,Precalculus,general,B,0.0,0.95,B,0.75,A,0.65,A,0.7,B
Llama-3.1-8B-Instruct,test/precalculus/826.json,Precalculus,general,B,1.0,0.95,A,0.65,B,0.75,,,B
Llama-3.1-8B-Instruct,test/precalculus/1281.json,Precalculus,general,B,0.0,0.9,B,0.65,A,0.6,A,0.9,B
Llama-3.1-8B-Instruct,test/precalculus/96.json,Precalculus,general,A,1.0,0.9,A,0.8,A,0.95,A,0.85,B
Llama-3.1-8B-Instruct,test/precalculus/1289.json,Precalculus,general,B,0.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/precalculus/902.json,Precalculus,general,B,1.0,0.8,B,0.75,B,0.5,,,B
Llama-3.1-8B-Instruct,test/precalculus/1291.json,Precalculus,general,B,1.0,0.8,A,0.85,B,0.85,B,0.8,B
Llama-3.1-8B-Instruct,test/precalculus/398.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/precalculus/681.json,Precalculus,general,B,1.0,0.95,B,0.85,B,0.85,,,B
Llama-3.1-8B-Instruct,test/precalculus/145.json,Precalculus,general,A,0.0,0.8,B,0.7,B,0.9,,,A
Llama-3.1-8B-Instruct,test/precalculus/625.json,Precalculus,general,A,1.0,0.95,A,0.75,A,0.95,A,0.9,A
Llama-3.1-8B-Instruct,test/precalculus/1202.json,Precalculus,general,A,0.0,0.95,A,0.75,B,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/precalculus/1133.json,Precalculus,general,A,0.0,0.8,B,0.6,B,0.9,,,A
Llama-3.1-8B-Instruct,test/precalculus/499.json,Precalculus,general,A,0.0,1.0,B,0.65,B,0.7,A,0.7,B
Llama-3.1-8B-Instruct,test/precalculus/323.json,Precalculus,general,A,0.0,1.0,B,0.65,B,0.85,A,0.7,B
Llama-3.1-8B-Instruct,test/precalculus/703.json,Precalculus,general,A,1.0,0.95,A,0.65,A,0.9,,,B
Llama-3.1-8B-Instruct,test/precalculus/1252.json,Precalculus,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/precalculus/1082.json,Precalculus,general,A,0.0,0.9,A,0.95,B,0.7,B,0.6,B
Llama-3.1-8B-Instruct,test/precalculus/356.json,Precalculus,general,A,1.0,0.8,A,0.95,A,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1994.json,Intermediate Algebra,general,A,0.0,0.9,A,0.9,B,0.7,B,0.8,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1197.json,Intermediate Algebra,general,A,0.0,0.9,B,0.7,B,0.95,A,0.8,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/134.json,Intermediate Algebra,general,A,1.0,1.0,A,1.0,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1000.json,Intermediate Algebra,general,A,0.0,0.9,B,0.98,B,0.95,B,1.0,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/607.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,B,0.7,B,0.7,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1388.json,Intermediate Algebra,general,A,1.0,0.9,A,0.7,B,0.85,A,0.6,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/428.json,Intermediate Algebra,general,A,0.0,0.8,B,0.95,B,0.98,B,0.9,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1454.json,Intermediate Algebra,general,B,0.0,0.85,A,0.6,A,0.6,A,0.7,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1217.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,B,0.98,A,0.8,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1168.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,1.0,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/956.json,Intermediate Algebra,general,A,0.0,0.9,B,0.6,B,0.5,A,0.7,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1247.json,Intermediate Algebra,general,A,0.0,0.9,A,0.65,B,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/279.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,B,0.8,B,0.9,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/207.json,Intermediate Algebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.9,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/623.json,Intermediate Algebra,general,B,1.0,0.9,B,0.95,B,0.9,B,0.9,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/47.json,Intermediate Algebra,general,A,0.0,0.9,B,0.98,B,0.99,B,1.0,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1849.json,Intermediate Algebra,general,A,1.0,0.8,A,0.65,A,0.85,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/2046.json,Intermediate Algebra,general,A,1.0,0.8,B,0.6,A,0.7,A,0.7,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/662.json,Intermediate Algebra,general,B,1.0,0.6,B,0.75,B,0.9,B,0.75,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/582.json,Intermediate Algebra,general,B,1.0,0.9,B,0.7,A,0.5,,,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/431.json,Intermediate Algebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/558.json,Intermediate Algebra,general,B,1.0,0.7,B,0.65,B,0.85,B,0.9,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/362.json,Intermediate Algebra,general,B,0.0,0.95,A,1.0,A,0.98,A,0.95,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/515.json,Intermediate Algebra,general,A,1.0,0.9,A,0.98,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/894.json,Intermediate Algebra,general,B,1.0,0.8,B,0.7,B,0.7,,,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/345.json,Intermediate Algebra,general,B,1.0,0.95,B,0.7,B,0.6,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1898.json,Intermediate Algebra,general,A,0.0,0.9,B,1.0,B,0.98,B,1.0,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/232.json,Intermediate Algebra,general,A,1.0,0.9,A,0.9,A,0.5,A,0.8,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/128.json,Intermediate Algebra,general,A,0.0,0.9,A,0.75,B,0.95,,,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1063.json,Intermediate Algebra,general,A,0.0,0.8,B,0.65,B,0.95,A,0.65,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1126.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.7,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/2022.json,Intermediate Algebra,general,A,1.0,0.9,A,0.6,B,0.95,A,0.6,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1151.json,Intermediate Algebra,general,B,1.0,0.9,B,0.9,B,0.95,,,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1408.json,Intermediate Algebra,general,A,0.0,0.9,B,0.6,B,0.7,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/966.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,B,0.65,,,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/964.json,Intermediate Algebra,general,A,1.0,0.9,A,0.75,A,0.75,A,0.7,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1410.json,Intermediate Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/991.json,Intermediate Algebra,general,B,0.0,0.95,A,0.65,B,0.8,A,0.7,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/183.json,Intermediate Algebra,general,A,0.0,0.9,B,0.6,B,0.85,A,0.6,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1422.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,B,0.85,B,0.8,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/2196.json,Intermediate Algebra,general,A,1.0,0.9,A,0.85,B,0.6,A,0.7,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/591.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,B,0.95,,,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1555.json,Intermediate Algebra,general,B,0.0,0.95,A,0.98,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1510.json,Intermediate Algebra,general,A,0.0,1.0,B,0.6,B,0.7,,,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/102.json,Intermediate Algebra,general,B,1.0,0.9,B,0.98,B,0.98,B,1.0,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/986.json,Intermediate Algebra,general,A,0.0,0.9,B,0.7,B,0.85,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1354.json,Intermediate Algebra,general,A,0.0,0.8,B,0.75,B,0.95,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1837.json,Intermediate Algebra,general,B,0.0,0.9,A,0.75,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/337.json,Intermediate Algebra,general,A,0.0,0.9,B,1.0,B,1.0,B,1.0,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1210.json,Intermediate Algebra,general,B,1.0,0.8,B,0.75,A,0.85,B,0.8,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1123.json,Intermediate Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/149.json,Intermediate Algebra,general,A,0.0,0.8,A,0.95,B,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1411.json,Intermediate Algebra,general,B,1.0,0.8,B,0.6,B,0.8,A,0.7,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/960.json,Intermediate Algebra,general,A,1.0,0.9,B,0.6,A,0.6,A,0.4,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1300.json,Intermediate Algebra,general,A,0.0,0.85,B,0.85,A,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/90.json,Intermediate Algebra,general,A,0.0,0.9,B,0.6,B,0.95,,,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/754.json,Intermediate Algebra,general,B,1.0,0.9,B,0.6,B,0.5,A,0.8,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/446.json,Intermediate Algebra,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1544.json,Intermediate Algebra,general,A,0.0,0.95,B,0.75,B,0.85,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1714.json,Intermediate Algebra,general,A,1.0,0.8,A,1.0,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/2152.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.8,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/117.json,Intermediate Algebra,general,A,0.0,0.9,B,0.9,B,0.75,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/190.json,Intermediate Algebra,general,B,1.0,0.9,B,0.7,B,0.6,B,0.6,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/776.json,Intermediate Algebra,general,A,0.0,0.8,B,0.6,A,0.4,B,0.6,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1566.json,Intermediate Algebra,general,A,1.0,0.9,A,0.7,A,0.85,,,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1572.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1166.json,Intermediate Algebra,general,B,0.0,0.95,B,0.6,A,0.6,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/860.json,Intermediate Algebra,general,B,1.0,0.9,B,0.8,B,0.85,A,0.7,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1407.json,Intermediate Algebra,general,A,0.0,0.9,B,0.65,B,0.9,B,0.8,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1405.json,Intermediate Algebra,general,A,1.0,0.95,A,0.98,A,0.95,A,0.8,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/690.json,Intermediate Algebra,general,A,0.0,0.9,A,0.7,B,0.8,,,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/955.json,Intermediate Algebra,general,A,1.0,0.9,B,0.7,A,0.5,A,0.7,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1992.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,B,0.85,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1111.json,Intermediate Algebra,general,B,1.0,0.8,B,0.85,B,0.95,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1791.json,Intermediate Algebra,general,B,1.0,0.8,B,0.65,B,0.95,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1806.json,Intermediate Algebra,general,B,0.0,0.8,A,0.65,B,0.6,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1797.json,Intermediate Algebra,general,B,1.0,0.9,B,0.75,B,0.6,A,0.65,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/2146.json,Intermediate Algebra,general,A,0.0,0.9,B,1.0,B,0.99,B,1.0,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/2015.json,Intermediate Algebra,general,A,1.0,0.9,B,0.7,A,0.9,A,0.6,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/121.json,Intermediate Algebra,general,A,1.0,0.9,A,0.85,B,0.7,A,0.3,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1014.json,Intermediate Algebra,general,A,1.0,0.95,A,0.7,B,0.9,A,0.6,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1462.json,Intermediate Algebra,general,B,1.0,0.9,B,0.85,B,0.9,,,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/199.json,Intermediate Algebra,general,B,0.0,0.9,A,0.85,A,0.9,A,0.95,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1779.json,Intermediate Algebra,general,A,0.0,1.0,B,0.6,B,0.75,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1102.json,Intermediate Algebra,general,B,0.0,0.9,A,1.0,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/834.json,Intermediate Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.9,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/158.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.6,A,0.6,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/752.json,Intermediate Algebra,general,B,1.0,0.8,B,0.7,B,0.85,B,0.8,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1279.json,Intermediate Algebra,general,A,0.0,0.9,B,0.75,B,0.65,,,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1467.json,Intermediate Algebra,general,B,1.0,0.8,B,0.65,B,0.95,,,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/101.json,Intermediate Algebra,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1365.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,B,0.85,A,0.8,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1350.json,Intermediate Algebra,general,B,0.0,0.9,B,0.65,A,0.7,A,0.8,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1930.json,Intermediate Algebra,general,A,1.0,0.9,A,0.6,B,0.85,A,0.7,B
Llama-3.1-8B-Instruct,test/intermediate_algebra/1981.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,B,0.85,B,0.8,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1232.json,Intermediate Algebra,general,B,1.0,0.9,B,0.9,B,0.99,B,0.9,A
Llama-3.1-8B-Instruct,test/intermediate_algebra/1508.json,Intermediate Algebra,general,A,0.0,0.9,B,0.75,B,0.9,,,A
Llama-3.1-8B-Instruct,test/algebra/2584.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/algebra/1349.json,Algebra,general,B,0.0,0.9,A,1.0,A,1.0,A,1.0,B
Llama-3.1-8B-Instruct,test/algebra/2036.json,Algebra,general,A,0.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/1098.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/1837.json,Algebra,general,B,1.0,0.9,B,0.75,B,0.9,B,0.8,B
Llama-3.1-8B-Instruct,test/algebra/2193.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/2427.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.7,,,A
Llama-3.1-8B-Instruct,test/algebra/1072.json,Algebra,general,A,0.0,1.0,B,0.75,A,0.85,B,0.7,A
Llama-3.1-8B-Instruct,test/algebra/24.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/algebra/2214.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/305.json,Algebra,general,A,1.0,0.8,A,0.85,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/1265.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/algebra/187.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/769.json,Algebra,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,B
Llama-3.1-8B-Instruct,test/algebra/722.json,Algebra,general,B,0.0,1.0,A,0.6,B,0.9,A,0.65,A
Llama-3.1-8B-Instruct,test/algebra/2046.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,1.0,A
Llama-3.1-8B-Instruct,test/algebra/2253.json,Algebra,general,A,1.0,0.9,A,0.9,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/algebra/1004.json,Algebra,general,A,1.0,0.9,A,0.85,A,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/algebra/1035.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/2700.json,Algebra,general,A,0.0,0.9,B,0.6,B,0.7,A,0.65,B
Llama-3.1-8B-Instruct,test/algebra/893.json,Algebra,general,B,1.0,0.9,B,1.0,B,0.98,B,1.0,A
Llama-3.1-8B-Instruct,test/algebra/567.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/892.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/2023.json,Algebra,general,A,0.0,0.8,B,0.95,B,0.9,B,0.7,B
Llama-3.1-8B-Instruct,test/algebra/873.json,Algebra,general,B,1.0,1.0,B,0.95,B,0.95,B,1.0,A
Llama-3.1-8B-Instruct,test/algebra/2058.json,Algebra,general,B,0.0,0.9,A,0.85,A,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/algebra/2593.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.8,A
Llama-3.1-8B-Instruct,test/algebra/2157.json,Algebra,general,B,0.0,0.9,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/algebra/2251.json,Algebra,general,A,0.0,0.8,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/1332.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/972.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/2232.json,Algebra,general,B,0.0,0.9,A,1.0,A,1.0,A,1.0,B
Llama-3.1-8B-Instruct,test/algebra/661.json,Algebra,general,A,0.0,0.9,B,0.95,A,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/246.json,Algebra,general,B,1.0,0.9,B,0.7,B,0.85,,,B
Llama-3.1-8B-Instruct,test/algebra/1519.json,Algebra,general,B,0.0,0.8,A,0.95,A,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/algebra/988.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/2570.json,Algebra,general,A,1.0,0.8,A,1.0,A,0.99,A,1.0,B
Llama-3.1-8B-Instruct,test/algebra/621.json,Algebra,general,A,0.0,1.0,B,1.0,B,1.0,B,1.0,A
Llama-3.1-8B-Instruct,test/algebra/1255.json,Algebra,general,A,1.0,0.9,A,0.9,B,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/2517.json,Algebra,general,B,0.0,1.0,B,0.85,A,0.85,A,0.8,B
Llama-3.1-8B-Instruct,test/algebra/478.json,Algebra,general,A,1.0,0.9,A,1.0,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/297.json,Algebra,general,B,1.0,0.9,B,0.85,B,0.75,B,0.75,A
Llama-3.1-8B-Instruct,test/algebra/841.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,A,0.9,B
Llama-3.1-8B-Instruct,test/algebra/686.json,Algebra,general,B,1.0,0.9,B,1.0,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/351.json,Algebra,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/1275.json,Algebra,general,A,0.0,0.6,B,0.85,B,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/algebra/1082.json,Algebra,general,B,0.0,0.8,A,0.85,A,0.95,B,0.85,A
Llama-3.1-8B-Instruct,test/algebra/1214.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/2199.json,Algebra,general,B,1.0,0.9,B,1.0,B,0.98,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/733.json,Algebra,general,A,1.0,0.8,A,0.65,A,0.98,,,B
Llama-3.1-8B-Instruct,test/algebra/109.json,Algebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/1937.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/291.json,Algebra,general,B,1.0,0.4,B,0.7,B,0.85,,,B
Llama-3.1-8B-Instruct,test/algebra/2102.json,Algebra,general,B,1.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/907.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/864.json,Algebra,general,B,1.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/2159.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/1578.json,Algebra,general,A,1.0,0.8,A,0.85,A,0.7,A,0.7,A
Llama-3.1-8B-Instruct,test/algebra/975.json,Algebra,general,A,0.0,0.8,B,0.75,B,0.9,B,0.6,A
Llama-3.1-8B-Instruct,test/algebra/1143.json,Algebra,general,B,1.0,0.9,B,0.65,B,0.85,A,0.8,B
Llama-3.1-8B-Instruct,test/algebra/2626.json,Algebra,general,A,0.0,0.9,B,0.65,B,0.9,A,0.7,A
Llama-3.1-8B-Instruct,test/algebra/1787.json,Algebra,general,B,1.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/1934.json,Algebra,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,A
Llama-3.1-8B-Instruct,test/algebra/2064.json,Algebra,general,A,1.0,0.6,A,0.95,A,0.95,A,1.0,B
Llama-3.1-8B-Instruct,test/algebra/694.json,Algebra,general,A,1.0,0.9,A,0.85,B,1.0,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/524.json,Algebra,general,A,1.0,0.9,A,0.98,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/2551.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/algebra/346.json,Algebra,general,A,1.0,0.9,A,0.9,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/1282.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.9,B,0.8,B
Llama-3.1-8B-Instruct,test/algebra/1184.json,Algebra,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/634.json,Algebra,general,B,1.0,0.8,B,0.98,B,1.0,B,1.0,A
Llama-3.1-8B-Instruct,test/algebra/2486.json,Algebra,general,B,1.0,0.9,B,0.85,A,0.95,B,0.9,B
Llama-3.1-8B-Instruct,test/algebra/2257.json,Algebra,general,B,1.0,1.0,B,1.0,B,0.98,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/1842.json,Algebra,general,B,1.0,0.9,B,1.0,B,0.99,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/791.json,Algebra,general,B,0.0,1.0,B,0.7,A,0.65,A,0.8,B
Llama-3.1-8B-Instruct,test/algebra/276.json,Algebra,general,A,0.0,0.8,B,1.0,B,1.0,B,1.0,A
Llama-3.1-8B-Instruct,test/algebra/2735.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/425.json,Algebra,general,B,0.0,0.8,A,0.85,A,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/1936.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/algebra/2176.json,Algebra,general,B,0.0,0.8,A,0.95,B,0.85,A,0.95,B
Llama-3.1-8B-Instruct,test/algebra/509.json,Algebra,general,A,0.0,0.8,B,0.85,B,0.85,,,A
Llama-3.1-8B-Instruct,test/algebra/1457.json,Algebra,general,B,1.0,1.0,B,0.95,B,0.9,B,0.9,A
Llama-3.1-8B-Instruct,test/algebra/2592.json,Algebra,general,B,0.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/858.json,Algebra,general,B,1.0,0.9,B,0.8,B,0.85,B,0.8,B
Llama-3.1-8B-Instruct,test/algebra/1529.json,Algebra,general,B,1.0,0.8,B,0.85,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/1338.json,Algebra,general,A,1.0,0.95,A,0.8,B,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/algebra/1547.json,Algebra,general,B,1.0,0.8,B,0.85,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/529.json,Algebra,general,A,1.0,0.9,B,0.7,A,1.0,,,A
Llama-3.1-8B-Instruct,test/algebra/1078.json,Algebra,general,A,0.0,0.9,B,0.7,B,0.7,,,A
Llama-3.1-8B-Instruct,test/algebra/251.json,Algebra,general,A,1.0,0.9,A,0.9,A,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/algebra/1199.json,Algebra,general,A,0.0,0.9,A,0.85,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/2264.json,Algebra,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/1303.json,Algebra,general,A,0.0,0.9,B,0.98,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/101.json,Algebra,general,A,0.0,0.9,B,0.85,A,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/170.json,Algebra,general,A,0.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/849.json,Algebra,general,A,1.0,0.9,A,1.0,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/algebra/1031.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,B
Llama-3.1-8B-Instruct,test/algebra/853.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.85,,,B
Llama-3.1-8B-Instruct,test/algebra/2277.json,Algebra,general,B,1.0,1.0,B,0.75,B,0.85,B,0.7,B
Llama-3.1-8B-Instruct,test/algebra/518.json,Algebra,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,A
Llama-3.1-8B-Instruct,test/algebra/114.json,Algebra,general,A,1.0,0.9,A,1.0,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/algebra/1960.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/2680.json,Algebra,general,B,1.0,0.9,B,1.0,B,0.98,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/2391.json,Algebra,general,A,0.0,0.95,B,0.65,B,0.75,B,0.65,A
Llama-3.1-8B-Instruct,test/algebra/776.json,Algebra,general,A,1.0,0.8,A,0.95,A,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/algebra/1796.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/1339.json,Algebra,general,B,0.0,0.9,A,0.6,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/2743.json,Algebra,general,A,1.0,0.9,B,0.95,A,0.95,A,1.0,B
Llama-3.1-8B-Instruct,test/algebra/2043.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.9,A,0.8,A
Llama-3.1-8B-Instruct,test/algebra/1553.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/algebra/2080.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/algebra/1343.json,Algebra,general,A,1.0,0.9,A,0.9,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/668.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/algebra/2430.json,Algebra,general,B,1.0,0.9,B,0.75,B,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/algebra/2789.json,Algebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/1814.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/2476.json,Algebra,general,A,1.0,0.9,A,0.85,B,0.85,A,1.0,B
Llama-3.1-8B-Instruct,test/algebra/2780.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/algebra/824.json,Algebra,general,B,1.0,0.8,B,1.0,B,0.98,B,1.0,A
Llama-3.1-8B-Instruct,test/algebra/1425.json,Algebra,general,B,0.0,0.85,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/224.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.9,A,0.8,A
Llama-3.1-8B-Instruct,test/algebra/435.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/algebra/2470.json,Algebra,general,A,1.0,0.95,A,0.8,A,0.85,B,0.95,A
Llama-3.1-8B-Instruct,test/algebra/2779.json,Algebra,general,B,1.0,0.8,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/number_theory/572.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/number_theory/515.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/number_theory/1032.json,Number Theory,general,A,0.0,0.9,B,0.75,B,0.85,B,0.8,A
Llama-3.1-8B-Instruct,test/number_theory/737.json,Number Theory,general,B,0.0,0.8,A,1.0,A,1.0,A,0.95,A
Llama-3.1-8B-Instruct,test/number_theory/864.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.9,B,0.7,A
Llama-3.1-8B-Instruct,test/number_theory/627.json,Number Theory,general,A,0.0,0.9,B,0.85,B,0.95,,,A
Llama-3.1-8B-Instruct,test/number_theory/45.json,Number Theory,general,A,0.0,0.9,B,0.85,B,0.95,A,0.9,B
Llama-3.1-8B-Instruct,test/number_theory/1055.json,Number Theory,general,A,0.0,0.9,B,0.85,B,0.85,A,0.7,A
Llama-3.1-8B-Instruct,test/number_theory/46.json,Number Theory,general,A,0.0,0.8,B,0.75,A,0.95,B,0.9,B
Llama-3.1-8B-Instruct,test/number_theory/516.json,Number Theory,general,A,1.0,0.9,A,0.7,A,0.7,,,B
Llama-3.1-8B-Instruct,test/number_theory/357.json,Number Theory,general,A,0.0,0.9,B,0.95,B,0.95,B,0.9,B
Llama-3.1-8B-Instruct,test/number_theory/914.json,Number Theory,general,A,1.0,0.9,A,0.95,A,0.98,A,1.0,B
Llama-3.1-8B-Instruct,test/number_theory/847.json,Number Theory,general,B,1.0,0.9,B,0.95,B,0.95,B,0.8,B
Llama-3.1-8B-Instruct,test/number_theory/753.json,Number Theory,general,A,0.0,0.95,B,0.75,B,0.7,A,0.8,B
Llama-3.1-8B-Instruct,test/number_theory/1257.json,Number Theory,general,B,1.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/number_theory/156.json,Number Theory,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/number_theory/612.json,Number Theory,general,B,0.0,0.8,A,0.95,A,0.95,A,0.9,A
Llama-3.1-8B-Instruct,test/number_theory/931.json,Number Theory,general,A,0.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/number_theory/521.json,Number Theory,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/number_theory/598.json,Number Theory,general,B,1.0,0.9,B,0.6,B,0.85,A,0.8,B
Llama-3.1-8B-Instruct,test/number_theory/978.json,Number Theory,general,A,1.0,0.9,A,0.85,A,0.85,A,0.7,A
Llama-3.1-8B-Instruct,test/number_theory/838.json,Number Theory,general,A,0.0,0.9,B,0.85,B,0.85,B,0.7,B
Llama-3.1-8B-Instruct,test/number_theory/149.json,Number Theory,general,B,1.0,0.8,B,1.0,B,0.98,B,1.0,B
Llama-3.1-8B-Instruct,test/number_theory/1201.json,Number Theory,general,A,0.0,0.8,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/number_theory/234.json,Number Theory,general,A,1.0,0.8,A,0.75,A,0.8,B,0.6,A
Llama-3.1-8B-Instruct,test/number_theory/417.json,Number Theory,general,A,0.0,0.85,B,0.65,B,0.98,B,0.7,B
Llama-3.1-8B-Instruct,test/number_theory/89.json,Number Theory,general,A,0.0,0.9,B,1.0,B,0.98,B,1.0,B
Llama-3.1-8B-Instruct,test/number_theory/183.json,Number Theory,general,A,1.0,0.8,A,1.0,A,0.95,A,1.0,B
Llama-3.1-8B-Instruct,test/number_theory/1065.json,Number Theory,general,B,1.0,0.9,B,0.65,B,0.95,B,0.7,A
Llama-3.1-8B-Instruct,test/number_theory/466.json,Number Theory,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/number_theory/634.json,Number Theory,general,B,1.0,0.9,B,0.95,B,0.85,B,0.7,B
Llama-3.1-8B-Instruct,test/number_theory/533.json,Number Theory,general,A,0.0,0.9,B,0.6,B,0.95,B,0.7,A
Llama-3.1-8B-Instruct,test/number_theory/691.json,Number Theory,general,A,1.0,0.9,A,0.6,A,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/number_theory/1287.json,Number Theory,general,A,1.0,0.9,A,0.7,A,0.9,A,0.9,A
Llama-3.1-8B-Instruct,test/number_theory/631.json,Number Theory,general,A,1.0,0.95,A,0.75,B,0.85,A,0.7,B
Llama-3.1-8B-Instruct,test/number_theory/488.json,Number Theory,general,B,0.0,0.9,A,0.95,B,0.9,A,0.9,B
Llama-3.1-8B-Instruct,test/number_theory/1172.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.98,A,0.8,B
Llama-3.1-8B-Instruct,test/number_theory/203.json,Number Theory,general,B,0.0,1.0,A,0.85,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/number_theory/911.json,Number Theory,general,B,1.0,0.9,B,0.85,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/number_theory/483.json,Number Theory,general,A,0.0,0.9,B,0.95,B,0.85,B,0.75,B
Llama-3.1-8B-Instruct,test/number_theory/368.json,Number Theory,general,A,0.0,0.9,B,0.75,B,0.95,B,0.8,A
Llama-3.1-8B-Instruct,test/number_theory/686.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/number_theory/820.json,Number Theory,general,B,1.0,0.9,B,0.7,B,0.85,B,0.7,B
Llama-3.1-8B-Instruct,test/number_theory/109.json,Number Theory,general,A,0.0,0.9,B,0.75,B,0.9,,,A
Llama-3.1-8B-Instruct,test/number_theory/427.json,Number Theory,general,A,0.0,0.8,B,0.95,B,0.95,B,1.0,A
Llama-3.1-8B-Instruct,test/number_theory/1185.json,Number Theory,general,B,0.0,0.95,A,0.85,A,0.9,A,0.95,A
Llama-3.1-8B-Instruct,test/number_theory/928.json,Number Theory,general,A,0.0,1.0,B,0.85,A,0.95,B,0.7,B
Llama-3.1-8B-Instruct,test/number_theory/132.json,Number Theory,general,A,1.0,0.8,A,1.0,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/number_theory/769.json,Number Theory,general,A,0.0,0.95,B,0.6,B,1.0,,,B
Llama-3.1-8B-Instruct,test/number_theory/1002.json,Number Theory,general,B,1.0,0.9,B,0.75,B,0.65,A,0.7,A
Llama-3.1-8B-Instruct,test/number_theory/410.json,Number Theory,general,B,1.0,0.9,B,0.95,B,0.98,B,1.0,A
Llama-3.1-8B-Instruct,test/number_theory/255.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/number_theory/1000.json,Number Theory,general,B,1.0,0.95,B,0.7,B,0.85,,,A
Llama-3.1-8B-Instruct,test/number_theory/13.json,Number Theory,general,B,1.0,0.8,B,0.9,B,0.95,B,0.9,B
Llama-3.1-8B-Instruct,test/number_theory/459.json,Number Theory,general,B,1.0,0.9,B,0.85,B,0.95,B,0.7,B
Llama-3.1-8B-Instruct,test/number_theory/342.json,Number Theory,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/number_theory/679.json,Number Theory,general,B,1.0,0.9,B,0.95,B,1.0,B,1.0,A
Llama-3.1-8B-Instruct,test/number_theory/72.json,Number Theory,general,B,0.0,0.8,A,1.0,A,0.95,A,1.0,B
Llama-3.1-8B-Instruct,test/number_theory/22.json,Number Theory,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/number_theory/1128.json,Number Theory,general,B,1.0,0.9,B,1.0,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/number_theory/1090.json,Number Theory,general,A,1.0,0.95,A,0.95,B,0.9,A,0.7,B
Llama-3.1-8B-Instruct,test/number_theory/239.json,Number Theory,general,A,0.0,0.9,B,0.85,A,0.95,B,0.9,B
Llama-3.1-8B-Instruct,test/prealgebra/1622.json,Prealgebra,general,B,1.0,0.8,B,0.85,B,0.75,B,0.7,B
Llama-3.1-8B-Instruct,test/prealgebra/1139.json,Prealgebra,general,A,1.0,0.9,A,0.85,A,0.85,,,B
Llama-3.1-8B-Instruct,test/prealgebra/1840.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
Llama-3.1-8B-Instruct,test/prealgebra/1302.json,Prealgebra,general,A,0.0,0.9,A,0.9,B,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/prealgebra/930.json,Prealgebra,general,B,1.0,0.9,B,1.0,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/1558.json,Prealgebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.9,B
Llama-3.1-8B-Instruct,test/prealgebra/1388.json,Prealgebra,general,A,0.0,0.9,B,0.9,A,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/prealgebra/951.json,Prealgebra,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/prealgebra/572.json,Prealgebra,general,B,1.0,0.9,B,0.85,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/prealgebra/1247.json,Prealgebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/prealgebra/1747.json,Prealgebra,general,A,0.0,0.95,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/1233.json,Prealgebra,general,A,0.0,0.9,B,0.55,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/prealgebra/192.json,Prealgebra,general,A,0.0,0.9,B,0.9,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/prealgebra/307.json,Prealgebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/prealgebra/1761.json,Prealgebra,general,A,0.0,0.9,B,0.95,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/1646.json,Prealgebra,general,B,0.0,0.95,A,0.7,A,0.75,A,0.7,B
Llama-3.1-8B-Instruct,test/prealgebra/105.json,Prealgebra,general,A,0.0,0.95,B,1.0,B,0.98,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/1924.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/prealgebra/1804.json,Prealgebra,general,A,0.0,0.9,B,0.85,B,0.85,B,0.8,B
Llama-3.1-8B-Instruct,test/prealgebra/1733.json,Prealgebra,general,A,1.0,0.9,A,0.8,A,0.7,A,0.8,A
Llama-3.1-8B-Instruct,test/prealgebra/505.json,Prealgebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/prealgebra/1686.json,Prealgebra,general,B,1.0,0.9,B,0.95,A,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/1807.json,Prealgebra,general,B,1.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/1297.json,Prealgebra,general,B,0.0,0.8,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/prealgebra/1655.json,Prealgebra,general,A,1.0,0.9,B,0.85,A,0.9,,,A
Llama-3.1-8B-Instruct,test/prealgebra/1356.json,Prealgebra,general,A,0.0,0.9,B,0.98,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/1003.json,Prealgebra,general,B,1.0,0.9,B,0.65,B,0.9,B,0.7,B
Llama-3.1-8B-Instruct,test/prealgebra/1272.json,Prealgebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/prealgebra/1113.json,Prealgebra,general,A,0.0,0.95,A,0.85,B,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/prealgebra/1908.json,Prealgebra,general,A,1.0,0.9,A,0.65,A,0.75,A,0.7,A
Llama-3.1-8B-Instruct,test/prealgebra/1922.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/prealgebra/1907.json,Prealgebra,general,B,1.0,0.9,B,0.95,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/2086.json,Prealgebra,general,A,0.0,0.9,B,0.7,B,0.9,B,0.9,B
Llama-3.1-8B-Instruct,test/prealgebra/378.json,Prealgebra,general,A,0.0,0.95,B,0.65,A,0.7,B,0.7,A
Llama-3.1-8B-Instruct,test/prealgebra/1555.json,Prealgebra,general,A,0.0,0.9,B,0.9,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/1436.json,Prealgebra,general,B,0.0,0.9,A,0.85,A,0.9,A,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/1961.json,Prealgebra,general,A,1.0,0.8,A,1.0,A,0.95,A,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/2057.json,Prealgebra,general,A,1.0,0.8,A,1.0,A,1.0,A,1.0,A
Llama-3.1-8B-Instruct,test/prealgebra/153.json,Prealgebra,general,A,0.0,0.9,B,0.75,B,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/prealgebra/874.json,Prealgebra,general,A,0.0,0.95,B,0.75,B,0.6,,,B
Llama-3.1-8B-Instruct,test/prealgebra/1251.json,Prealgebra,general,A,0.0,0.8,B,0.65,A,0.6,,,B
Llama-3.1-8B-Instruct,test/prealgebra/1458.json,Prealgebra,general,B,1.0,0.8,B,0.85,B,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/prealgebra/1995.json,Prealgebra,general,B,0.0,0.9,A,0.85,A,0.85,A,0.7,B
Llama-3.1-8B-Instruct,test/prealgebra/1317.json,Prealgebra,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/prealgebra/1742.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/prealgebra/993.json,Prealgebra,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/1834.json,Prealgebra,general,B,1.0,0.9,B,0.6,B,0.9,A,0.7,B
Llama-3.1-8B-Instruct,test/prealgebra/1512.json,Prealgebra,general,B,1.0,0.95,B,0.8,B,0.85,,,B
Llama-3.1-8B-Instruct,test/prealgebra/260.json,Prealgebra,general,A,0.0,0.95,B,0.6,B,0.65,A,0.7,B
Llama-3.1-8B-Instruct,test/prealgebra/1787.json,Prealgebra,general,A,1.0,0.8,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/prealgebra/1044.json,Prealgebra,general,A,0.0,0.8,B,0.8,A,0.9,B,0.8,A
Llama-3.1-8B-Instruct,test/prealgebra/465.json,Prealgebra,general,A,1.0,0.95,A,0.6,B,0.7,A,0.7,B
Llama-3.1-8B-Instruct,test/prealgebra/1423.json,Prealgebra,general,A,1.0,0.9,A,1.0,A,0.99,A,1.0,A
Llama-3.1-8B-Instruct,test/prealgebra/954.json,Prealgebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/prealgebra/1973.json,Prealgebra,general,A,1.0,0.85,A,0.98,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/prealgebra/1730.json,Prealgebra,general,A,1.0,0.9,A,0.92,A,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/prealgebra/1238.json,Prealgebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/prealgebra/1353.json,Prealgebra,general,B,1.0,0.8,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/1187.json,Prealgebra,general,A,1.0,0.9,A,1.0,A,1.0,A,0.8,A
Llama-3.1-8B-Instruct,test/prealgebra/1743.json,Prealgebra,general,A,0.0,0.9,B,1.0,B,0.95,B,1.0,A
Llama-3.1-8B-Instruct,test/prealgebra/1865.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.98,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/1298.json,Prealgebra,general,B,1.0,0.9,B,0.85,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/2066.json,Prealgebra,general,B,1.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/631.json,Prealgebra,general,A,0.0,1.0,A,0.75,B,0.9,B,0.7,A
Llama-3.1-8B-Instruct,test/prealgebra/977.json,Prealgebra,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/prealgebra/1991.json,Prealgebra,general,A,1.0,0.9,A,1.0,A,0.98,A,1.0,A
Llama-3.1-8B-Instruct,test/prealgebra/1784.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,1.0,A,0.8,A
Llama-3.1-8B-Instruct,test/prealgebra/1572.json,Prealgebra,general,A,0.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/65.json,Prealgebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/prealgebra/1227.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.85,B,0.9,B
Llama-3.1-8B-Instruct,test/prealgebra/2019.json,Prealgebra,general,B,1.0,0.9,B,0.95,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/1640.json,Prealgebra,general,B,0.0,0.9,A,0.9,A,0.9,A,1.0,B
Llama-3.1-8B-Instruct,test/prealgebra/2037.json,Prealgebra,general,A,1.0,0.9,A,1.0,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/prealgebra/996.json,Prealgebra,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,A
Llama-3.1-8B-Instruct,test/prealgebra/805.json,Prealgebra,general,A,0.0,0.9,B,0.7,B,0.6,A,0.7,B
Llama-3.1-8B-Instruct,test/prealgebra/914.json,Prealgebra,general,B,1.0,0.9,B,1.0,B,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/prealgebra/1114.json,Prealgebra,general,A,1.0,0.9,A,0.95,A,0.8,A,0.9,B
Llama-3.1-8B-Instruct,test/prealgebra/846.json,Prealgebra,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/prealgebra/1930.json,Prealgebra,general,B,1.0,0.9,B,0.85,A,0.65,,,A
Llama-3.1-8B-Instruct,test/prealgebra/1252.json,Prealgebra,general,A,1.0,0.9,A,0.85,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/prealgebra/1203.json,Prealgebra,general,A,0.0,0.9,B,0.9,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/prealgebra/1128.json,Prealgebra,general,A,1.0,0.9,A,0.9,A,0.95,A,0.8,A
Llama-3.1-8B-Instruct,test/geometry/248.json,Geometry,general,B,1.0,0.8,B,0.65,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/geometry/434.json,Geometry,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,A
Llama-3.1-8B-Instruct,test/geometry/967.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/geometry/627.json,Geometry,general,A,0.0,0.9,B,0.6,B,0.85,B,0.7,A
Llama-3.1-8B-Instruct,test/geometry/178.json,Geometry,general,B,1.0,0.9,B,0.95,B,0.95,B,1.0,B
Llama-3.1-8B-Instruct,test/geometry/456.json,Geometry,general,A,1.0,0.95,B,0.85,A,0.95,A,0.9,B
Llama-3.1-8B-Instruct,test/geometry/353.json,Geometry,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/geometry/183.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.9,,,A
Llama-3.1-8B-Instruct,test/geometry/283.json,Geometry,general,A,0.0,0.9,B,0.7,A,0.9,B,0.7,A
Llama-3.1-8B-Instruct,test/geometry/1140.json,Geometry,general,B,1.0,0.9,B,0.85,B,0.98,,,A
Llama-3.1-8B-Instruct,test/geometry/172.json,Geometry,general,A,0.0,0.9,B,0.6,B,0.9,A,0.7,A
Llama-3.1-8B-Instruct,test/geometry/880.json,Geometry,general,A,0.0,0.9,A,0.6,B,0.85,,,B
Llama-3.1-8B-Instruct,test/geometry/802.json,Geometry,general,A,1.0,0.8,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/geometry/65.json,Geometry,general,A,1.0,0.9,A,0.85,A,0.7,A,0.4,A
Llama-3.1-8B-Instruct,test/geometry/702.json,Geometry,general,A,0.0,0.9,B,0.75,B,0.75,,,B
Llama-3.1-8B-Instruct,test/geometry/221.json,Geometry,general,B,0.0,0.8,A,0.75,A,0.9,,,A
Llama-3.1-8B-Instruct,test/geometry/547.json,Geometry,general,A,0.0,0.9,B,0.6,B,0.9,A,0.7,A
Llama-3.1-8B-Instruct,test/geometry/229.json,Geometry,general,A,1.0,0.9,B,0.8,A,0.85,A,0.7,B
Llama-3.1-8B-Instruct,test/geometry/254.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/geometry/473.json,Geometry,general,A,1.0,0.9,A,0.95,A,1.0,A,1.0,B
Llama-3.1-8B-Instruct,test/geometry/347.json,Geometry,general,A,0.0,0.9,B,0.95,B,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/geometry/483.json,Geometry,general,B,1.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/geometry/826.json,Geometry,general,A,0.0,0.9,B,0.75,B,0.9,,,A
Llama-3.1-8B-Instruct,test/geometry/226.json,Geometry,general,A,0.0,0.9,B,0.95,B,0.95,B,0.9,A
Llama-3.1-8B-Instruct,test/geometry/686.json,Geometry,general,B,0.0,0.9,A,0.95,A,0.98,A,1.0,B
Llama-3.1-8B-Instruct,test/geometry/1097.json,Geometry,general,A,1.0,1.0,A,1.0,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/geometry/965.json,Geometry,general,A,1.0,0.8,A,0.85,B,0.5,A,0.7,B
Llama-3.1-8B-Instruct,test/geometry/711.json,Geometry,general,A,1.0,0.9,A,0.7,A,0.85,A,0.7,A
Llama-3.1-8B-Instruct,test/geometry/1108.json,Geometry,general,A,1.0,0.9,A,1.0,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/geometry/947.json,Geometry,general,A,0.0,0.9,B,1.0,B,0.98,B,1.0,B
Llama-3.1-8B-Instruct,test/geometry/465.json,Geometry,general,A,1.0,1.0,A,0.65,A,0.85,A,0.6,A
Llama-3.1-8B-Instruct,test/geometry/73.json,Geometry,general,B,0.0,0.9,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/geometry/106.json,Geometry,general,A,1.0,0.9,A,0.95,A,0.9,A,0.95,B
Llama-3.1-8B-Instruct,test/geometry/846.json,Geometry,general,A,0.0,0.9,B,0.75,B,0.75,,,B
Llama-3.1-8B-Instruct,test/geometry/538.json,Geometry,general,A,0.0,0.9,B,0.85,A,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/geometry/795.json,Geometry,general,B,1.0,0.9,B,1.0,B,0.98,B,1.0,B
Llama-3.1-8B-Instruct,test/geometry/817.json,Geometry,general,A,0.0,0.95,B,0.75,B,0.95,,,A
Llama-3.1-8B-Instruct,test/geometry/843.json,Geometry,general,A,1.0,0.9,A,0.55,A,0.95,B,0.95,A
Llama-3.1-8B-Instruct,test/geometry/477.json,Geometry,general,A,0.0,0.9,B,0.85,B,0.9,B,0.9,A
Llama-3.1-8B-Instruct,test/geometry/561.json,Geometry,general,A,0.0,0.9,B,0.65,B,0.95,B,0.7,A
Llama-3.1-8B-Instruct,test/geometry/615.json,Geometry,general,A,0.0,0.95,A,0.75,B,0.85,B,0.7,B
Llama-3.1-8B-Instruct,test/counting_and_probability/525.json,Counting & Probability,general,B,1.0,0.9,B,0.6,B,0.9,B,0.8,B
Llama-3.1-8B-Instruct,test/counting_and_probability/666.json,Counting & Probability,general,A,1.0,0.9,A,1.0,A,0.98,A,1.0,A
Llama-3.1-8B-Instruct,test/counting_and_probability/134.json,Counting & Probability,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,A
Llama-3.1-8B-Instruct,test/counting_and_probability/119.json,Counting & Probability,general,B,0.0,0.95,A,0.95,A,0.98,A,0.8,A
Llama-3.1-8B-Instruct,test/counting_and_probability/1114.json,Counting & Probability,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
Llama-3.1-8B-Instruct,test/counting_and_probability/377.json,Counting & Probability,general,A,0.0,0.8,B,0.95,A,0.95,B,1.0,A
Llama-3.1-8B-Instruct,test/counting_and_probability/23957.json,Counting & Probability,general,B,1.0,0.95,B,0.95,B,0.98,B,1.0,B
Llama-3.1-8B-Instruct,test/counting_and_probability/1060.json,Counting & Probability,general,B,1.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/counting_and_probability/430.json,Counting & Probability,general,A,0.0,0.9,B,0.6,B,0.5,A,0.7,B
Llama-3.1-8B-Instruct,test/counting_and_probability/159.json,Counting & Probability,general,A,0.0,0.9,B,0.85,A,0.85,B,0.8,B
Llama-3.1-8B-Instruct,test/counting_and_probability/230.json,Counting & Probability,general,A,0.0,0.8,B,0.7,B,0.95,,,B
Llama-3.1-8B-Instruct,test/counting_and_probability/803.json,Counting & Probability,general,B,1.0,0.9,B,0.85,B,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/counting_and_probability/181.json,Counting & Probability,general,B,0.0,0.8,B,0.6,A,0.7,A,0.65,A
Llama-3.1-8B-Instruct,test/counting_and_probability/51.json,Counting & Probability,general,A,0.0,0.95,B,1.0,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/counting_and_probability/508.json,Counting & Probability,general,A,1.0,0.95,A,0.75,A,0.98,B,0.75,B
Llama-3.1-8B-Instruct,test/counting_and_probability/389.json,Counting & Probability,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Llama-3.1-8B-Instruct,test/counting_and_probability/765.json,Counting & Probability,general,A,0.0,0.8,B,0.6,B,0.6,B,0.75,A
Llama-3.1-8B-Instruct,test/counting_and_probability/282.json,Counting & Probability,general,B,0.0,0.9,B,0.6,A,0.95,,,A
Llama-3.1-8B-Instruct,test/counting_and_probability/71.json,Counting & Probability,general,B,1.0,0.8,B,1.0,B,0.95,B,1.0,A
Llama-3.1-8B-Instruct,test/counting_and_probability/894.json,Counting & Probability,general,A,1.0,0.9,B,0.85,A,0.7,A,0.7,A
Llama-3.1-8B-Instruct,test/counting_and_probability/1009.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/counting_and_probability/913.json,Counting & Probability,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/counting_and_probability/25149.json,Counting & Probability,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/counting_and_probability/339.json,Counting & Probability,general,A,0.0,0.8,B,0.75,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/counting_and_probability/870.json,Counting & Probability,general,B,1.0,0.95,B,0.85,B,0.9,B,0.9,A
Llama-3.1-8B-Instruct,test/counting_and_probability/216.json,Counting & Probability,general,B,1.0,0.95,B,0.85,A,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/counting_and_probability/737.json,Counting & Probability,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,A
Llama-3.1-8B-Instruct,test/counting_and_probability/116.json,Counting & Probability,general,A,1.0,0.8,A,1.0,A,1.0,A,1.0,A
Llama-3.1-8B-Instruct,test/counting_and_probability/238.json,Counting & Probability,general,A,1.0,1.0,A,0.75,A,0.95,,,A
Llama-3.1-8B-Instruct,test/counting_and_probability/1014.json,Counting & Probability,general,A,1.0,0.9,A,0.9,B,0.95,A,0.95,B
Llama-3.1-8B-Instruct,test/counting_and_probability/14.json,Counting & Probability,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,B
Llama-3.1-8B-Instruct,test/counting_and_probability/188.json,Counting & Probability,general,A,1.0,0.9,B,0.7,A,0.9,A,0.6,A
Llama-3.1-8B-Instruct,test/counting_and_probability/761.json,Counting & Probability,general,A,1.0,0.85,A,0.95,A,0.95,A,1.0,A
Llama-3.1-8B-Instruct,test/counting_and_probability/10.json,Counting & Probability,general,B,0.0,0.95,B,0.65,A,0.7,,,A
Llama-3.1-8B-Instruct,test/counting_and_probability/731.json,Counting & Probability,general,B,1.0,0.9,B,0.6,B,0.85,,,B
Llama-3.1-8B-Instruct,test/counting_and_probability/190.json,Counting & Probability,general,B,1.0,0.9,B,1.0,B,1.0,B,1.0,B
Llama-3.1-8B-Instruct,test/counting_and_probability/1003.json,Counting & Probability,general,B,1.0,0.9,B,0.65,B,0.85,,,A
Llama-3.1-8B-Instruct,test/counting_and_probability/199.json,Counting & Probability,general,A,0.0,0.8,B,0.95,B,0.85,,,B
OpenThinker2-7B,test/precalculus/807.json,Precalculus,general,B,0.0,1.0,A,0.85,B,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/927.json,Precalculus,general,A,0.0,1.0,A,0.85,B,0.98,,,B
OpenThinker2-7B,test/precalculus/1303.json,Precalculus,general,B,1.0,1.0,A,1.0,B,0.95,B,0.95,A
OpenThinker2-7B,test/precalculus/990.json,Precalculus,general,A,1.0,1.0,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/precalculus/1199.json,Precalculus,general,B,0.0,0.75,A,0.6,A,0.7,B,0.9,A
OpenThinker2-7B,test/precalculus/779.json,Precalculus,general,A,0.0,1.0,B,0.9,B,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/285.json,Precalculus,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/precalculus/1105.json,Precalculus,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,B
OpenThinker2-7B,test/precalculus/675.json,Precalculus,general,Tie,0.5,0.5,A,1.0,A,0.95,A,1.0,B
OpenThinker2-7B,test/precalculus/1146.json,Precalculus,general,B,1.0,1.0,B,0.6,B,0.95,A,1.0,A
OpenThinker2-7B,test/precalculus/1313.json,Precalculus,general,A,1.0,0.75,B,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/24313.json,Precalculus,general,A,0.0,1.0,B,0.9,B,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/34.json,Precalculus,general,A,1.0,1.0,B,0.9,A,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/1300.json,Precalculus,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,A
OpenThinker2-7B,test/precalculus/44.json,Precalculus,general,B,0.0,1.0,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/477.json,Precalculus,general,A,0.0,1.0,B,0.95,B,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/43.json,Precalculus,general,A,0.0,1.0,A,0.85,B,0.95,,,B
OpenThinker2-7B,test/precalculus/986.json,Precalculus,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/117.json,Precalculus,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/697.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/659.json,Precalculus,general,A,0.0,0.95,B,1.0,B,0.95,A,0.95,B
OpenThinker2-7B,test/precalculus/263.json,Precalculus,general,Tie,0.5,0.5,A,0.55,A,0.98,,,A
OpenThinker2-7B,test/precalculus/541.json,Precalculus,general,A,1.0,1.0,A,0.85,A,0.98,A,1.0,B
OpenThinker2-7B,test/precalculus/190.json,Precalculus,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/precalculus/819.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,B
OpenThinker2-7B,test/precalculus/1056.json,Precalculus,general,Tie,0.5,0.5,B,0.9,B,0.95,B,1.0,B
OpenThinker2-7B,test/precalculus/441.json,Precalculus,general,A,1.0,1.0,A,0.55,A,0.95,,,B
OpenThinker2-7B,test/precalculus/989.json,Precalculus,general,B,1.0,0.95,B,0.75,A,0.85,B,0.95,A
OpenThinker2-7B,test/precalculus/920.json,Precalculus,general,A,1.0,1.0,A,0.8,A,0.95,A,1.0,A
OpenThinker2-7B,test/precalculus/452.json,Precalculus,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/580.json,Precalculus,general,A,1.0,1.0,A,0.5,A,0.95,A,1.0,A
OpenThinker2-7B,test/precalculus/768.json,Precalculus,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/precalculus/1172.json,Precalculus,general,A,0.0,1.0,B,0.85,B,0.95,,,B
OpenThinker2-7B,test/precalculus/1201.json,Precalculus,general,B,1.0,1.0,B,0.85,B,0.85,B,0.7,A
OpenThinker2-7B,test/precalculus/881.json,Precalculus,general,A,1.0,1.0,A,0.9,A,0.95,,,A
OpenThinker2-7B,test/precalculus/695.json,Precalculus,general,A,0.0,1.0,B,0.85,B,0.95,,,A
OpenThinker2-7B,test/precalculus/742.json,Precalculus,general,A,0.0,1.0,B,0.95,B,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/801.json,Precalculus,general,A,0.0,0.95,B,0.6,B,0.9,B,0.95,A
OpenThinker2-7B,test/precalculus/826.json,Precalculus,general,A,1.0,0.95,B,0.9,A,0.95,,,A
OpenThinker2-7B,test/precalculus/1281.json,Precalculus,general,A,0.0,0.9,B,0.55,B,0.9,B,0.95,B
OpenThinker2-7B,test/precalculus/96.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/1289.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.98,A,1.0,A
OpenThinker2-7B,test/precalculus/902.json,Precalculus,general,B,0.0,0.95,A,0.7,B,0.95,A,0.6,A
OpenThinker2-7B,test/precalculus/1291.json,Precalculus,general,Tie,0.5,0.5,A,0.6,B,0.98,,,A
OpenThinker2-7B,test/precalculus/398.json,Precalculus,general,A,0.0,1.0,B,0.95,B,0.95,,,A
OpenThinker2-7B,test/precalculus/681.json,Precalculus,general,A,1.0,1.0,A,0.85,A,0.95,,,B
OpenThinker2-7B,test/precalculus/145.json,Precalculus,general,Tie,0.5,0.5,B,0.75,A,0.95,A,0.95,B
OpenThinker2-7B,test/precalculus/625.json,Precalculus,general,A,0.0,1.0,B,0.9,B,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/1202.json,Precalculus,general,B,1.0,0.95,B,0.9,B,0.95,B,0.95,B
OpenThinker2-7B,test/precalculus/1133.json,Precalculus,general,A,0.0,1.0,B,0.85,B,0.95,,,B
OpenThinker2-7B,test/precalculus/499.json,Precalculus,general,A,0.0,0.95,A,0.85,B,0.95,,,A
OpenThinker2-7B,test/precalculus/323.json,Precalculus,general,A,1.0,1.0,A,0.85,B,0.95,A,0.95,A
OpenThinker2-7B,test/precalculus/703.json,Precalculus,general,B,1.0,1.0,A,0.85,B,0.95,,,B
OpenThinker2-7B,test/precalculus/1252.json,Precalculus,general,A,1.0,0.9,B,0.9,A,0.95,,,B
OpenThinker2-7B,test/precalculus/1082.json,Precalculus,general,B,1.0,1.0,B,1.0,B,0.98,B,1.0,B
OpenThinker2-7B,test/precalculus/356.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.98,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/1994.json,Intermediate Algebra,general,A,1.0,1.0,A,0.9,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/1197.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.85,A,0.6,A,0.8,A
OpenThinker2-7B,test/intermediate_algebra/134.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1000.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/607.json,Intermediate Algebra,general,B,1.0,1.0,B,0.85,B,0.98,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1388.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.85,B,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/428.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.95,B,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1454.json,Intermediate Algebra,general,B,1.0,0.9,B,0.85,B,0.85,B,0.6,A
OpenThinker2-7B,test/intermediate_algebra/1217.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1168.json,Intermediate Algebra,general,A,1.0,1.0,B,0.85,A,0.95,,,A
OpenThinker2-7B,test/intermediate_algebra/956.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,A
OpenThinker2-7B,test/intermediate_algebra/1247.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,A,0.95,,,B
OpenThinker2-7B,test/intermediate_algebra/279.json,Intermediate Algebra,general,A,1.0,1.0,B,0.85,A,0.98,,,A
OpenThinker2-7B,test/intermediate_algebra/207.json,Intermediate Algebra,general,A,1.0,1.0,A,0.98,A,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/623.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/47.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,,,A
OpenThinker2-7B,test/intermediate_algebra/1849.json,Intermediate Algebra,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/2046.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,,,A
OpenThinker2-7B,test/intermediate_algebra/662.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.65,B,0.85,B,0.95,A
OpenThinker2-7B,test/intermediate_algebra/582.json,Intermediate Algebra,general,B,0.0,1.0,B,0.85,A,0.95,,,A
OpenThinker2-7B,test/intermediate_algebra/431.json,Intermediate Algebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/558.json,Intermediate Algebra,general,A,1.0,0.5,A,0.9,A,0.75,A,0.7,A
OpenThinker2-7B,test/intermediate_algebra/362.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/515.json,Intermediate Algebra,general,B,0.0,1.0,A,0.85,B,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/894.json,Intermediate Algebra,general,A,1.0,0.9,A,0.98,A,0.95,,,B
OpenThinker2-7B,test/intermediate_algebra/345.json,Intermediate Algebra,general,B,1.0,1.0,B,0.85,B,0.95,B,0.95,A
OpenThinker2-7B,test/intermediate_algebra/1898.json,Intermediate Algebra,general,A,1.0,1.0,A,0.6,A,0.95,A,1.0,A
OpenThinker2-7B,test/intermediate_algebra/232.json,Intermediate Algebra,general,A,1.0,0.95,A,0.8,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/128.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,,,A
OpenThinker2-7B,test/intermediate_algebra/1063.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.9,B,0.9,B,0.9,A
OpenThinker2-7B,test/intermediate_algebra/1126.json,Intermediate Algebra,general,A,1.0,1.0,B,0.55,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/2022.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1151.json,Intermediate Algebra,general,B,0.0,1.0,A,0.85,B,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/1408.json,Intermediate Algebra,general,A,1.0,0.95,A,0.9,A,0.92,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/966.json,Intermediate Algebra,general,A,0.0,1.0,B,0.95,A,0.95,B,0.95,B
OpenThinker2-7B,test/intermediate_algebra/964.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,,,A
OpenThinker2-7B,test/intermediate_algebra/1410.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,,,B
OpenThinker2-7B,test/intermediate_algebra/991.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,,,A
OpenThinker2-7B,test/intermediate_algebra/183.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.92,A,0.9,A
OpenThinker2-7B,test/intermediate_algebra/1422.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/2196.json,Intermediate Algebra,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,B
OpenThinker2-7B,test/intermediate_algebra/591.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,B,0.98,A,1.0,A
OpenThinker2-7B,test/intermediate_algebra/1555.json,Intermediate Algebra,general,A,1.0,1.0,B,0.85,A,0.95,A,1.0,B
OpenThinker2-7B,test/intermediate_algebra/1510.json,Intermediate Algebra,general,B,1.0,0.95,B,0.7,B,0.92,A,0.7,A
OpenThinker2-7B,test/intermediate_algebra/102.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,1.0,A
OpenThinker2-7B,test/intermediate_algebra/986.json,Intermediate Algebra,general,A,1.0,1.0,A,0.9,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/1354.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1837.json,Intermediate Algebra,general,B,0.0,1.0,A,0.6,B,0.99,A,1.0,A
OpenThinker2-7B,test/intermediate_algebra/337.json,Intermediate Algebra,general,A,0.0,1.0,B,0.85,B,0.95,,,B
OpenThinker2-7B,test/intermediate_algebra/1210.json,Intermediate Algebra,general,A,1.0,0.9,A,0.95,A,0.95,,,A
OpenThinker2-7B,test/intermediate_algebra/1123.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.85,A,0.95,,,A
OpenThinker2-7B,test/intermediate_algebra/149.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.98,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1411.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
OpenThinker2-7B,test/intermediate_algebra/960.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.7,B,0.8,A
OpenThinker2-7B,test/intermediate_algebra/1300.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,B,0.95,,,A
OpenThinker2-7B,test/intermediate_algebra/90.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.98,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/754.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,A
OpenThinker2-7B,test/intermediate_algebra/446.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.92,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/1544.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.7,A,0.85,A,0.7,B
OpenThinker2-7B,test/intermediate_algebra/1714.json,Intermediate Algebra,general,A,1.0,0.98,A,0.75,B,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/2152.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/117.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,B
OpenThinker2-7B,test/intermediate_algebra/190.json,Intermediate Algebra,general,A,1.0,0.95,A,0.9,B,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/776.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,B,0.92,B,0.9,B
OpenThinker2-7B,test/intermediate_algebra/1566.json,Intermediate Algebra,general,B,0.0,1.0,A,0.9,A,0.95,,,B
OpenThinker2-7B,test/intermediate_algebra/1572.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,A,0.95,,,B
OpenThinker2-7B,test/intermediate_algebra/1166.json,Intermediate Algebra,general,B,1.0,1.0,B,0.9,B,0.9,B,0.95,A
OpenThinker2-7B,test/intermediate_algebra/860.json,Intermediate Algebra,general,B,0.0,1.0,A,0.95,B,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1407.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.85,A,0.95,,,B
OpenThinker2-7B,test/intermediate_algebra/1405.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/690.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/955.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1992.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/intermediate_algebra/1111.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1791.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.85,B,0.98,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1806.json,Intermediate Algebra,general,B,0.0,1.0,A,0.95,A,0.95,A,1.0,A
OpenThinker2-7B,test/intermediate_algebra/1797.json,Intermediate Algebra,general,A,1.0,1.0,A,0.98,A,0.95,A,1.0,A
OpenThinker2-7B,test/intermediate_algebra/2146.json,Intermediate Algebra,general,A,1.0,1.0,B,0.95,A,0.95,,,A
OpenThinker2-7B,test/intermediate_algebra/2015.json,Intermediate Algebra,general,B,0.0,0.85,A,0.55,B,0.6,A,0.7,B
OpenThinker2-7B,test/intermediate_algebra/121.json,Intermediate Algebra,general,A,0.0,0.95,A,0.85,B,0.95,,,A
OpenThinker2-7B,test/intermediate_algebra/1014.json,Intermediate Algebra,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1462.json,Intermediate Algebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,A
OpenThinker2-7B,test/intermediate_algebra/199.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1779.json,Intermediate Algebra,general,B,0.0,0.95,A,0.7,B,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1102.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/834.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,,,B
OpenThinker2-7B,test/intermediate_algebra/158.json,Intermediate Algebra,general,B,0.0,0.95,A,0.95,B,0.98,A,1.0,A
OpenThinker2-7B,test/intermediate_algebra/752.json,Intermediate Algebra,general,A,1.0,1.0,A,0.9,A,0.98,,,B
OpenThinker2-7B,test/intermediate_algebra/1279.json,Intermediate Algebra,general,A,1.0,0.95,A,0.75,B,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1467.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.92,B,1.0,B
OpenThinker2-7B,test/intermediate_algebra/101.json,Intermediate Algebra,general,A,1.0,0.99,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/intermediate_algebra/1365.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,,,B
OpenThinker2-7B,test/intermediate_algebra/1350.json,Intermediate Algebra,general,B,0.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/intermediate_algebra/1930.json,Intermediate Algebra,general,B,1.0,0.85,B,0.85,A,0.95,B,0.95,A
OpenThinker2-7B,test/intermediate_algebra/1981.json,Intermediate Algebra,general,Tie,0.5,0.5,A,1.0,A,0.95,A,0.95,A
OpenThinker2-7B,test/intermediate_algebra/1232.json,Intermediate Algebra,general,A,1.0,1.0,A,0.55,A,0.95,,,A
OpenThinker2-7B,test/intermediate_algebra/1508.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,B
OpenThinker2-7B,test/algebra/2584.json,Algebra,general,A,0.0,0.99,B,0.95,B,0.95,B,0.95,A
OpenThinker2-7B,test/algebra/1349.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,,,B
OpenThinker2-7B,test/algebra/2036.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,B
OpenThinker2-7B,test/algebra/1098.json,Algebra,general,B,0.0,0.95,A,1.0,A,0.98,A,1.0,B
OpenThinker2-7B,test/algebra/1837.json,Algebra,general,A,0.0,1.0,A,0.6,B,0.95,,,B
OpenThinker2-7B,test/algebra/2193.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/2427.json,Algebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/1072.json,Algebra,general,A,1.0,1.0,B,0.85,A,0.98,A,0.95,A
OpenThinker2-7B,test/algebra/24.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,B
OpenThinker2-7B,test/algebra/2214.json,Algebra,general,A,1.0,1.0,A,0.9,A,0.95,A,1.0,B
OpenThinker2-7B,test/algebra/305.json,Algebra,general,B,0.0,1.0,A,0.9,A,0.98,A,0.95,B
OpenThinker2-7B,test/algebra/1265.json,Algebra,general,A,1.0,0.95,B,0.92,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/187.json,Algebra,general,A,1.0,1.0,A,0.85,B,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/769.json,Algebra,general,B,0.0,1.0,A,0.95,A,0.95,A,1.0,B
OpenThinker2-7B,test/algebra/722.json,Algebra,general,B,0.0,1.0,A,0.95,A,0.95,,,A
OpenThinker2-7B,test/algebra/2046.json,Algebra,general,A,1.0,0.95,A,0.95,B,0.98,A,0.95,A
OpenThinker2-7B,test/algebra/2253.json,Algebra,general,A,1.0,0.98,A,0.95,B,0.9,,,B
OpenThinker2-7B,test/algebra/1004.json,Algebra,general,A,1.0,0.99,A,0.9,B,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/1035.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/2700.json,Algebra,general,Tie,0.5,0.5,A,1.0,A,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/893.json,Algebra,general,B,1.0,1.0,B,0.9,B,0.98,A,1.0,A
OpenThinker2-7B,test/algebra/567.json,Algebra,general,A,1.0,1.0,B,0.9,A,0.95,,,A
OpenThinker2-7B,test/algebra/892.json,Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,,,A
OpenThinker2-7B,test/algebra/2023.json,Algebra,general,Tie,0.5,0.5,A,0.55,A,0.95,A,1.0,B
OpenThinker2-7B,test/algebra/873.json,Algebra,general,B,1.0,1.0,B,0.9,B,0.98,B,1.0,A
OpenThinker2-7B,test/algebra/2058.json,Algebra,general,A,1.0,0.95,B,0.55,A,0.98,,,B
OpenThinker2-7B,test/algebra/2593.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.98,,,A
OpenThinker2-7B,test/algebra/2157.json,Algebra,general,A,1.0,0.95,A,0.95,B,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/2251.json,Algebra,general,A,1.0,1.0,B,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/1332.json,Algebra,general,A,1.0,1.0,A,0.9,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/972.json,Algebra,general,A,0.0,1.0,A,0.85,B,0.95,,,B
OpenThinker2-7B,test/algebra/2232.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,1.0,A
OpenThinker2-7B,test/algebra/661.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/246.json,Algebra,general,B,0.0,1.0,A,0.9,A,0.95,,,B
OpenThinker2-7B,test/algebra/1519.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/988.json,Algebra,general,A,1.0,0.98,A,0.95,A,0.95,A,1.0,B
OpenThinker2-7B,test/algebra/2570.json,Algebra,general,Tie,0.5,0.5,B,0.92,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/621.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/1255.json,Algebra,general,B,1.0,0.99,B,0.85,B,0.95,,,A
OpenThinker2-7B,test/algebra/2517.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/478.json,Algebra,general,B,1.0,1.0,B,0.95,A,0.95,B,0.95,A
OpenThinker2-7B,test/algebra/297.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.85,B,1.0,A
OpenThinker2-7B,test/algebra/841.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/686.json,Algebra,general,Tie,0.5,0.5,A,0.95,B,0.95,,,A
OpenThinker2-7B,test/algebra/351.json,Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/1275.json,Algebra,general,B,0.0,0.95,A,0.65,A,0.95,,,A
OpenThinker2-7B,test/algebra/1082.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/1214.json,Algebra,general,B,0.0,0.95,A,0.85,A,0.95,B,0.95,A
OpenThinker2-7B,test/algebra/2199.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/733.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/algebra/109.json,Algebra,general,B,0.0,1.0,B,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/1937.json,Algebra,general,B,0.0,1.0,B,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/291.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/algebra/2102.json,Algebra,general,A,0.0,0.98,B,0.95,B,0.98,A,0.95,A
OpenThinker2-7B,test/algebra/907.json,Algebra,general,A,1.0,1.0,B,0.85,A,0.98,A,1.0,B
OpenThinker2-7B,test/algebra/864.json,Algebra,general,A,1.0,0.99,B,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/2159.json,Algebra,general,A,0.0,1.0,B,0.85,B,0.95,,,A
OpenThinker2-7B,test/algebra/1578.json,Algebra,general,A,1.0,1.0,B,0.85,A,0.95,,,A
OpenThinker2-7B,test/algebra/975.json,Algebra,general,A,0.0,0.5,B,0.9,B,0.95,,,A
OpenThinker2-7B,test/algebra/1143.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/2626.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,B
OpenThinker2-7B,test/algebra/1787.json,Algebra,general,B,0.0,1.0,A,0.85,A,0.98,A,0.95,B
OpenThinker2-7B,test/algebra/1934.json,Algebra,general,Tie,0.5,0.5,A,0.8,B,0.98,A,0.95,B
OpenThinker2-7B,test/algebra/2064.json,Algebra,general,A,0.0,0.9,B,0.9,B,0.98,A,0.9,A
OpenThinker2-7B,test/algebra/694.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.98,A,1.0,B
OpenThinker2-7B,test/algebra/524.json,Algebra,general,A,1.0,1.0,A,0.8,A,0.95,B,0.95,A
OpenThinker2-7B,test/algebra/2551.json,Algebra,general,A,1.0,1.0,A,0.95,B,0.95,A,1.0,A
OpenThinker2-7B,test/algebra/346.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/1282.json,Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/1184.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/634.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,B
OpenThinker2-7B,test/algebra/2486.json,Algebra,general,A,0.0,0.95,B,1.0,B,0.98,B,1.0,B
OpenThinker2-7B,test/algebra/2257.json,Algebra,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/1842.json,Algebra,general,A,1.0,1.0,A,0.75,A,0.95,A,1.0,A
OpenThinker2-7B,test/algebra/791.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/276.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,A,0.8,A
OpenThinker2-7B,test/algebra/2735.json,Algebra,general,B,1.0,1.0,B,0.95,A,0.95,B,0.95,A
OpenThinker2-7B,test/algebra/425.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,B
OpenThinker2-7B,test/algebra/1936.json,Algebra,general,A,1.0,0.99,B,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/2176.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/509.json,Algebra,general,A,1.0,1.0,B,0.85,A,0.95,,,A
OpenThinker2-7B,test/algebra/1457.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,,,A
OpenThinker2-7B,test/algebra/2592.json,Algebra,general,A,1.0,1.0,B,0.92,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/858.json,Algebra,general,Tie,0.5,0.5,A,0.92,A,0.95,,,B
OpenThinker2-7B,test/algebra/1529.json,Algebra,general,B,0.0,0.95,B,0.95,A,0.95,,,A
OpenThinker2-7B,test/algebra/1338.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
OpenThinker2-7B,test/algebra/1547.json,Algebra,general,A,0.0,1.0,A,0.85,B,0.95,,,A
OpenThinker2-7B,test/algebra/529.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/1078.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,,,B
OpenThinker2-7B,test/algebra/251.json,Algebra,general,A,1.0,0.99,B,0.95,A,0.95,,,A
OpenThinker2-7B,test/algebra/1199.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,,,B
OpenThinker2-7B,test/algebra/2264.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,,,A
OpenThinker2-7B,test/algebra/1303.json,Algebra,general,B,0.0,1.0,A,0.95,A,0.98,A,0.95,B
OpenThinker2-7B,test/algebra/101.json,Algebra,general,A,0.0,1.0,B,0.85,B,0.98,B,0.95,A
OpenThinker2-7B,test/algebra/170.json,Algebra,general,B,1.0,1.0,B,0.95,B,0.98,B,0.95,A
OpenThinker2-7B,test/algebra/849.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/algebra/1031.json,Algebra,general,B,0.0,1.0,B,0.55,A,0.95,A,1.0,A
OpenThinker2-7B,test/algebra/853.json,Algebra,general,A,0.0,1.0,A,0.85,B,0.98,,,A
OpenThinker2-7B,test/algebra/2277.json,Algebra,general,A,1.0,1.0,A,0.9,A,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/518.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/114.json,Algebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/1960.json,Algebra,general,A,1.0,0.98,A,0.85,A,0.98,A,0.95,B
OpenThinker2-7B,test/algebra/2680.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/2391.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,,,B
OpenThinker2-7B,test/algebra/776.json,Algebra,general,A,0.0,1.0,B,0.85,B,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/1796.json,Algebra,general,A,0.0,1.0,B,0.92,B,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/1339.json,Algebra,general,A,1.0,1.0,A,0.9,A,0.98,A,0.95,B
OpenThinker2-7B,test/algebra/2743.json,Algebra,general,B,1.0,1.0,B,0.85,B,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/2043.json,Algebra,general,Tie,0.5,0.5,B,0.98,B,0.95,B,0.95,B
OpenThinker2-7B,test/algebra/1553.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,1.0,A
OpenThinker2-7B,test/algebra/2080.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,,,A
OpenThinker2-7B,test/algebra/1343.json,Algebra,general,A,1.0,0.98,B,0.55,A,0.95,A,1.0,A
OpenThinker2-7B,test/algebra/668.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,B
OpenThinker2-7B,test/algebra/2430.json,Algebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/2789.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.98,A,0.95,B
OpenThinker2-7B,test/algebra/1814.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,A
OpenThinker2-7B,test/algebra/2476.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/algebra/2780.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,1.0,A
OpenThinker2-7B,test/algebra/824.json,Algebra,general,A,1.0,1.0,A,0.8,A,0.95,,,A
OpenThinker2-7B,test/algebra/1425.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/224.json,Algebra,general,B,0.0,1.0,A,0.9,A,0.95,,,B
OpenThinker2-7B,test/algebra/435.json,Algebra,general,A,0.0,1.0,B,0.85,B,0.98,,,A
OpenThinker2-7B,test/algebra/2470.json,Algebra,general,A and B,0.0,0.5,B,0.95,B,0.95,A,0.95,A
OpenThinker2-7B,test/algebra/2779.json,Algebra,general,B,0.0,0.95,A,0.9,A,0.85,A,0.95,A
OpenThinker2-7B,test/number_theory/572.json,Number Theory,general,B,0.0,0.95,A,0.9,A,0.95,,,B
OpenThinker2-7B,test/number_theory/515.json,Number Theory,general,B,0.0,1.0,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/number_theory/1032.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/737.json,Number Theory,general,B,0.0,1.0,A,0.92,A,0.95,,,B
OpenThinker2-7B,test/number_theory/864.json,Number Theory,general,A,0.0,0.95,A,0.85,B,0.95,,,A
OpenThinker2-7B,test/number_theory/627.json,Number Theory,general,A,1.0,1.0,B,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/45.json,Number Theory,general,B,0.0,1.0,B,0.92,A,0.95,,,A
OpenThinker2-7B,test/number_theory/1055.json,Number Theory,general,Tie,0.5,0.5,B,0.9,B,0.95,A,0.95,B
OpenThinker2-7B,test/number_theory/46.json,Number Theory,general,A,0.0,1.0,A,0.9,B,0.95,,,B
OpenThinker2-7B,test/number_theory/516.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/number_theory/357.json,Number Theory,general,B,1.0,1.0,A,0.9,B,0.95,,,B
OpenThinker2-7B,test/number_theory/914.json,Number Theory,general,B,0.0,1.0,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/number_theory/847.json,Number Theory,general,B,0.0,0.99,A,0.95,B,0.95,,,B
OpenThinker2-7B,test/number_theory/753.json,Number Theory,general,B,1.0,1.0,B,1.0,B,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/1257.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/number_theory/156.json,Number Theory,general,A,1.0,1.0,B,0.9,A,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/612.json,Number Theory,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/931.json,Number Theory,general,A,1.0,1.0,A,0.85,A,0.95,,,B
OpenThinker2-7B,test/number_theory/521.json,Number Theory,general,B,0.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/number_theory/598.json,Number Theory,general,A,1.0,0.95,A,0.9,B,0.95,A,0.95,B
OpenThinker2-7B,test/number_theory/978.json,Number Theory,general,A,0.0,1.0,B,0.9,B,0.95,A,1.0,A
OpenThinker2-7B,test/number_theory/838.json,Number Theory,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,A
OpenThinker2-7B,test/number_theory/149.json,Number Theory,general,B,0.0,1.0,A,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/number_theory/1201.json,Number Theory,general,A,1.0,1.0,B,0.85,A,0.95,,,A
OpenThinker2-7B,test/number_theory/234.json,Number Theory,general,A,0.0,1.0,B,0.85,B,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/417.json,Number Theory,general,B,1.0,1.0,B,0.9,A,0.95,B,0.9,B
OpenThinker2-7B,test/number_theory/89.json,Number Theory,general,A,1.0,1.0,A,0.85,A,0.95,,,B
OpenThinker2-7B,test/number_theory/183.json,Number Theory,general,A,1.0,1.0,A,1.0,A,0.95,,,A
OpenThinker2-7B,test/number_theory/1065.json,Number Theory,general,A,0.0,0.95,A,0.9,B,0.95,,,B
OpenThinker2-7B,test/number_theory/466.json,Number Theory,general,A,0.0,1.0,B,0.95,B,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/634.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/533.json,Number Theory,general,Tie,0.5,0.5,B,0.85,B,0.95,,,A
OpenThinker2-7B,test/number_theory/691.json,Number Theory,general,A,1.0,1.0,B,0.9,A,0.95,A,1.0,A
OpenThinker2-7B,test/number_theory/1287.json,Number Theory,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/number_theory/631.json,Number Theory,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,B
OpenThinker2-7B,test/number_theory/488.json,Number Theory,general,B,0.0,1.0,B,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/number_theory/1172.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.95,,,A
OpenThinker2-7B,test/number_theory/203.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/911.json,Number Theory,general,A,1.0,0.9,A,0.85,B,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/483.json,Number Theory,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/368.json,Number Theory,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/number_theory/686.json,Number Theory,general,A,1.0,1.0,A,0.85,B,0.95,A,1.0,B
OpenThinker2-7B,test/number_theory/820.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
OpenThinker2-7B,test/number_theory/109.json,Number Theory,general,A,1.0,1.0,A,0.85,B,0.95,A,0.95,B
OpenThinker2-7B,test/number_theory/427.json,Number Theory,general,A,1.0,1.0,A,0.9,A,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/1185.json,Number Theory,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/928.json,Number Theory,general,A,1.0,1.0,A,0.85,A,0.95,,,B
OpenThinker2-7B,test/number_theory/132.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.98,,,B
OpenThinker2-7B,test/number_theory/769.json,Number Theory,general,A,0.0,0.85,B,0.95,B,0.98,A,0.95,A
OpenThinker2-7B,test/number_theory/1002.json,Number Theory,general,Tie,0.5,0.5,B,0.9,A,0.98,A,1.0,B
OpenThinker2-7B,test/number_theory/410.json,Number Theory,general,B,0.0,1.0,A,0.85,B,0.92,A,0.9,B
OpenThinker2-7B,test/number_theory/255.json,Number Theory,general,A,1.0,1.0,A,0.9,A,0.95,A,0.95,B
OpenThinker2-7B,test/number_theory/1000.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/number_theory/13.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/459.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/342.json,Number Theory,general,A,1.0,1.0,A,0.9,A,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/679.json,Number Theory,general,Tie,0.5,0.5,A,0.85,A,0.95,,,B
OpenThinker2-7B,test/number_theory/72.json,Number Theory,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/number_theory/22.json,Number Theory,general,Tie,0.5,0.5,A,0.9,B,0.95,,,B
OpenThinker2-7B,test/number_theory/1128.json,Number Theory,general,A,0.0,1.0,B,0.85,B,0.95,A,0.95,A
OpenThinker2-7B,test/number_theory/1090.json,Number Theory,general,A,1.0,1.0,A,0.85,A,0.95,,,A
OpenThinker2-7B,test/number_theory/239.json,Number Theory,general,B,0.0,1.0,A,0.95,A,0.98,A,0.95,B
OpenThinker2-7B,test/prealgebra/1622.json,Prealgebra,general,B,0.0,0.95,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/prealgebra/1139.json,Prealgebra,general,Tie,0.5,0.5,A,0.7,B,0.92,B,0.95,B
OpenThinker2-7B,test/prealgebra/1840.json,Prealgebra,general,B,0.0,1.0,B,0.95,A,0.95,,,A
OpenThinker2-7B,test/prealgebra/1302.json,Prealgebra,general,A,1.0,0.99,A,0.9,B,0.95,A,0.95,B
OpenThinker2-7B,test/prealgebra/930.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/prealgebra/1558.json,Prealgebra,general,A,1.0,1.0,A,0.95,B,0.95,A,0.95,B
OpenThinker2-7B,test/prealgebra/1388.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.9,A,1.0,B
OpenThinker2-7B,test/prealgebra/951.json,Prealgebra,general,A,1.0,1.0,B,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/572.json,Prealgebra,general,A,0.0,1.0,B,0.85,B,0.95,A,0.95,B
OpenThinker2-7B,test/prealgebra/1247.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.98,,,B
OpenThinker2-7B,test/prealgebra/1747.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/1233.json,Prealgebra,general,A,1.0,1.0,A,0.85,B,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/192.json,Prealgebra,general,A,0.0,1.0,B,0.9,B,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/307.json,Prealgebra,general,A,1.0,0.99,B,0.95,A,0.9,A,0.95,A
OpenThinker2-7B,test/prealgebra/1761.json,Prealgebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/1646.json,Prealgebra,general,A,0.0,0.81,B,0.75,B,0.92,A,0.7,A
OpenThinker2-7B,test/prealgebra/105.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/1924.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.98,A,1.0,B
OpenThinker2-7B,test/prealgebra/1804.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/1733.json,Prealgebra,general,A,1.0,1.0,B,0.7,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/505.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/prealgebra/1686.json,Prealgebra,general,Tie,0.5,0.5,A,0.85,B,0.95,A,1.0,A
OpenThinker2-7B,test/prealgebra/1807.json,Prealgebra,general,A,1.0,1.0,B,0.9,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/1297.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/prealgebra/1655.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.98,A,0.95,B
OpenThinker2-7B,test/prealgebra/1356.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,A
OpenThinker2-7B,test/prealgebra/1003.json,Prealgebra,general,Tie,0.5,0.5,B,0.65,B,0.95,B,0.95,A
OpenThinker2-7B,test/prealgebra/1272.json,Prealgebra,general,A,1.0,1.0,A,0.95,B,0.95,A,0.95,B
OpenThinker2-7B,test/prealgebra/1113.json,Prealgebra,general,B,0.0,1.0,A,0.95,A,0.98,A,0.95,B
OpenThinker2-7B,test/prealgebra/1908.json,Prealgebra,general,B,0.0,1.0,A,1.0,A,0.95,,,A
OpenThinker2-7B,test/prealgebra/1922.json,Prealgebra,general,A,0.0,0.95,A,0.95,B,0.95,B,1.0,B
OpenThinker2-7B,test/prealgebra/1907.json,Prealgebra,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,A
OpenThinker2-7B,test/prealgebra/2086.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/378.json,Prealgebra,general,A,1.0,0.95,B,0.8,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/1555.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.98,A,0.95,A
OpenThinker2-7B,test/prealgebra/1436.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.98,A,1.0,B
OpenThinker2-7B,test/prealgebra/1961.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
OpenThinker2-7B,test/prealgebra/2057.json,Prealgebra,general,A,1.0,1.0,A,0.92,A,0.95,A,1.0,B
OpenThinker2-7B,test/prealgebra/153.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,B
OpenThinker2-7B,test/prealgebra/874.json,Prealgebra,general,B,1.0,0.95,B,0.6,B,0.85,B,0.9,A
OpenThinker2-7B,test/prealgebra/1251.json,Prealgebra,general,A,1.0,0.95,A,0.8,A,0.95,,,A
OpenThinker2-7B,test/prealgebra/1458.json,Prealgebra,general,B,1.0,0.98,B,0.95,A,0.95,B,0.95,A
OpenThinker2-7B,test/prealgebra/1995.json,Prealgebra,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/prealgebra/1317.json,Prealgebra,general,A,1.0,1.0,A,0.95,B,0.95,A,1.0,B
OpenThinker2-7B,test/prealgebra/1742.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/prealgebra/993.json,Prealgebra,general,Tie,0.5,0.5,B,0.85,B,0.98,A,0.95,B
OpenThinker2-7B,test/prealgebra/1834.json,Prealgebra,general,A,1.0,0.98,A,0.9,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/1512.json,Prealgebra,general,A,1.0,1.0,A,0.9,A,0.95,A,1.0,B
OpenThinker2-7B,test/prealgebra/260.json,Prealgebra,general,B,1.0,0.95,B,0.75,B,0.95,B,0.9,B
OpenThinker2-7B,test/prealgebra/1787.json,Prealgebra,general,A,1.0,0.95,A,0.6,B,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/1044.json,Prealgebra,general,A,0.0,0.95,A,0.6,B,0.75,B,0.95,B
OpenThinker2-7B,test/prealgebra/465.json,Prealgebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,0.95,A
OpenThinker2-7B,test/prealgebra/1423.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/prealgebra/954.json,Prealgebra,general,B,0.0,0.95,A,0.85,A,0.98,A,0.95,B
OpenThinker2-7B,test/prealgebra/1973.json,Prealgebra,general,A,1.0,1.0,B,0.9,A,0.92,A,0.95,A
OpenThinker2-7B,test/prealgebra/1730.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/prealgebra/1238.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,,,A
OpenThinker2-7B,test/prealgebra/1353.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.98,A,0.95,A
OpenThinker2-7B,test/prealgebra/1187.json,Prealgebra,general,A,0.0,0.5,B,0.95,B,0.95,B,0.85,A
OpenThinker2-7B,test/prealgebra/1743.json,Prealgebra,general,A,0.0,1.0,B,0.95,B,0.95,,,A
OpenThinker2-7B,test/prealgebra/1865.json,Prealgebra,general,B,1.0,1.0,B,0.95,B,0.95,B,1.0,B
OpenThinker2-7B,test/prealgebra/1298.json,Prealgebra,general,A,1.0,1.0,B,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/2066.json,Prealgebra,general,A,0.0,1.0,B,0.95,B,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/631.json,Prealgebra,general,A,0.0,1.0,B,0.75,A,0.95,B,0.95,B
OpenThinker2-7B,test/prealgebra/977.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/prealgebra/1991.json,Prealgebra,general,A,0.0,1.0,B,0.95,B,0.98,A,0.95,B
OpenThinker2-7B,test/prealgebra/1784.json,Prealgebra,general,A,1.0,1.0,A,0.9,A,0.95,A,0.95,B
OpenThinker2-7B,test/prealgebra/1572.json,Prealgebra,general,A,0.0,1.0,B,0.95,B,0.98,B,0.95,A
OpenThinker2-7B,test/prealgebra/65.json,Prealgebra,general,B,1.0,0.95,B,0.85,B,0.95,,,A
OpenThinker2-7B,test/prealgebra/1227.json,Prealgebra,general,B,1.0,1.0,B,0.95,B,0.95,B,0.95,A
OpenThinker2-7B,test/prealgebra/2019.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/1640.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,,,A
OpenThinker2-7B,test/prealgebra/2037.json,Prealgebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/996.json,Prealgebra,general,A,1.0,1.0,B,0.6,A,0.95,A,0.95,A
OpenThinker2-7B,test/prealgebra/805.json,Prealgebra,general,A,0.0,1.0,B,0.95,B,0.95,,,B
OpenThinker2-7B,test/prealgebra/914.json,Prealgebra,general,Tie,0.5,0.5,B,0.7,B,0.65,A,0.65,B
OpenThinker2-7B,test/prealgebra/1114.json,Prealgebra,general,A,0.0,1.0,A,0.9,B,0.98,B,0.95,A
OpenThinker2-7B,test/prealgebra/846.json,Prealgebra,general,B,0.0,1.0,A,0.95,A,0.98,,,B
OpenThinker2-7B,test/prealgebra/1930.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,0.95,A,1.0,A
OpenThinker2-7B,test/prealgebra/1252.json,Prealgebra,general,A,0.0,1.0,B,0.95,A,0.95,B,0.9,A
OpenThinker2-7B,test/prealgebra/1203.json,Prealgebra,general,B,0.0,0.95,A,0.85,A,0.98,A,1.0,B
OpenThinker2-7B,test/prealgebra/1128.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,B
OpenThinker2-7B,test/geometry/248.json,Geometry,general,A,0.0,1.0,B,0.95,B,0.95,,,B
OpenThinker2-7B,test/geometry/434.json,Geometry,general,Tie,0.5,0.5,B,0.55,B,0.95,B,0.95,A
OpenThinker2-7B,test/geometry/967.json,Geometry,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,B
OpenThinker2-7B,test/geometry/627.json,Geometry,general,B,1.0,1.0,B,0.75,B,0.9,A,0.95,B
OpenThinker2-7B,test/geometry/178.json,Geometry,general,Tie,0.5,0.5,A,0.85,A,0.95,A,1.0,A
OpenThinker2-7B,test/geometry/456.json,Geometry,general,A,0.0,1.0,A,0.9,B,0.95,B,0.95,B
OpenThinker2-7B,test/geometry/353.json,Geometry,general,A,1.0,1.0,A,0.55,A,0.95,A,0.9,A
OpenThinker2-7B,test/geometry/183.json,Geometry,general,A,1.0,0.85,A,0.85,B,0.9,A,0.9,A
OpenThinker2-7B,test/geometry/283.json,Geometry,general,Tie,0.5,0.5,B,0.85,B,0.95,A,0.95,B
OpenThinker2-7B,test/geometry/1140.json,Geometry,general,A,1.0,1.0,A,0.8,B,0.95,A,1.0,B
OpenThinker2-7B,test/geometry/172.json,Geometry,general,B,1.0,0.85,B,0.85,B,0.98,A,0.9,B
OpenThinker2-7B,test/geometry/880.json,Geometry,general,A,0.0,0.95,A,0.8,B,0.9,,,B
OpenThinker2-7B,test/geometry/802.json,Geometry,general,A,1.0,1.0,B,0.85,A,0.95,A,1.0,A
OpenThinker2-7B,test/geometry/65.json,Geometry,general,A,0.0,1.0,A,0.85,B,0.95,B,0.9,A
OpenThinker2-7B,test/geometry/702.json,Geometry,general,A,1.0,1.0,A,0.95,A,0.98,A,1.0,B
OpenThinker2-7B,test/geometry/221.json,Geometry,general,A,0.0,1.0,B,0.85,B,0.98,,,A
OpenThinker2-7B,test/geometry/547.json,Geometry,general,A,1.0,1.0,A,0.6,B,0.95,A,0.9,B
OpenThinker2-7B,test/geometry/229.json,Geometry,general,Tie,0.5,0.5,B,0.65,B,0.75,A,0.8,A
OpenThinker2-7B,test/geometry/254.json,Geometry,general,A,1.0,1.0,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/geometry/473.json,Geometry,general,B,0.0,0.95,B,0.6,A,0.98,A,1.0,B
OpenThinker2-7B,test/geometry/347.json,Geometry,general,A,1.0,1.0,B,0.85,A,0.95,A,0.9,A
OpenThinker2-7B,test/geometry/483.json,Geometry,general,A,0.0,1.0,B,0.95,A,0.95,B,0.95,A
OpenThinker2-7B,test/geometry/826.json,Geometry,general,Tie,0.5,0.5,B,0.55,B,0.9,B,0.85,A
OpenThinker2-7B,test/geometry/226.json,Geometry,general,A,0.0,0.95,B,0.85,B,0.95,B,0.9,B
OpenThinker2-7B,test/geometry/686.json,Geometry,general,B,1.0,0.95,B,0.75,B,0.95,B,0.95,A
OpenThinker2-7B,test/geometry/1097.json,Geometry,general,A,0.0,1.0,A,0.85,B,0.98,,,A
OpenThinker2-7B,test/geometry/965.json,Geometry,general,B,1.0,1.0,B,0.95,B,0.92,B,0.95,A
OpenThinker2-7B,test/geometry/711.json,Geometry,general,Tie,0.5,0.5,B,0.55,B,0.95,A,0.95,A
OpenThinker2-7B,test/geometry/1108.json,Geometry,general,B,1.0,1.0,B,0.95,B,0.98,,,A
OpenThinker2-7B,test/geometry/947.json,Geometry,general,Tie,0.5,0.5,B,0.9,B,0.95,,,B
OpenThinker2-7B,test/geometry/465.json,Geometry,general,B,1.0,0.9,B,0.85,A,0.7,B,0.7,B
OpenThinker2-7B,test/geometry/73.json,Geometry,general,A,1.0,1.0,A,0.75,A,0.95,A,0.95,A
OpenThinker2-7B,test/geometry/106.json,Geometry,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/geometry/846.json,Geometry,general,B,0.0,1.0,A,0.95,A,0.95,,,A
OpenThinker2-7B,test/geometry/538.json,Geometry,general,Tie,0.5,0.5,A,0.9,A,0.98,A,1.0,B
OpenThinker2-7B,test/geometry/795.json,Geometry,general,A,1.0,1.0,A,0.95,A,0.95,,,A
OpenThinker2-7B,test/geometry/817.json,Geometry,general,Tie,0.5,0.5,B,0.6,B,0.85,B,0.7,A
OpenThinker2-7B,test/geometry/843.json,Geometry,general,B,0.0,1.0,B,0.9,A,0.98,A,0.95,B
OpenThinker2-7B,test/geometry/477.json,Geometry,general,B,1.0,0.95,A,0.65,B,0.95,B,0.9,B
OpenThinker2-7B,test/geometry/561.json,Geometry,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,B
OpenThinker2-7B,test/geometry/615.json,Geometry,general,A,1.0,0.95,A,0.85,B,0.95,A,0.95,A
OpenThinker2-7B,test/counting_and_probability/525.json,Counting & Probability,general,B,1.0,1.0,B,0.95,B,0.95,B,0.95,B
OpenThinker2-7B,test/counting_and_probability/666.json,Counting & Probability,general,A,0.0,1.0,B,0.85,B,0.95,A,0.95,B
OpenThinker2-7B,test/counting_and_probability/134.json,Counting & Probability,general,A,1.0,0.99,A,0.85,B,0.95,A,0.95,A
OpenThinker2-7B,test/counting_and_probability/119.json,Counting & Probability,general,A,1.0,1.0,B,0.55,A,0.98,A,0.95,A
OpenThinker2-7B,test/counting_and_probability/1114.json,Counting & Probability,general,A,1.0,1.0,A,1.0,A,0.95,,,B
OpenThinker2-7B,test/counting_and_probability/377.json,Counting & Probability,general,A,0.0,0.95,B,0.85,B,0.98,,,A
OpenThinker2-7B,test/counting_and_probability/23957.json,Counting & Probability,general,A,1.0,0.99,A,0.85,A,0.98,B,0.9,B
OpenThinker2-7B,test/counting_and_probability/1060.json,Counting & Probability,general,A,1.0,1.0,A,0.95,A,0.98,A,1.0,B
OpenThinker2-7B,test/counting_and_probability/430.json,Counting & Probability,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,A
OpenThinker2-7B,test/counting_and_probability/159.json,Counting & Probability,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/counting_and_probability/230.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,,,A
OpenThinker2-7B,test/counting_and_probability/803.json,Counting & Probability,general,A,0.0,0.95,A,0.6,B,0.98,,,A
OpenThinker2-7B,test/counting_and_probability/181.json,Counting & Probability,general,A,1.0,0.9,A,0.85,B,0.85,A,0.3,A
OpenThinker2-7B,test/counting_and_probability/51.json,Counting & Probability,general,A,1.0,0.99,A,0.95,A,0.95,A,1.0,B
OpenThinker2-7B,test/counting_and_probability/508.json,Counting & Probability,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/counting_and_probability/389.json,Counting & Probability,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,A
OpenThinker2-7B,test/counting_and_probability/765.json,Counting & Probability,general,A,0.0,1.0,B,0.95,B,0.95,,,B
OpenThinker2-7B,test/counting_and_probability/282.json,Counting & Probability,general,Tie,0.5,0.5,B,0.85,B,0.95,B,0.95,A
OpenThinker2-7B,test/counting_and_probability/71.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
OpenThinker2-7B,test/counting_and_probability/894.json,Counting & Probability,general,A,1.0,1.0,B,0.85,A,0.95,,,A
OpenThinker2-7B,test/counting_and_probability/1009.json,Counting & Probability,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/counting_and_probability/913.json,Counting & Probability,general,A,1.0,1.0,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/counting_and_probability/25149.json,Counting & Probability,general,B,1.0,0.55,A,0.85,B,0.98,,,A
OpenThinker2-7B,test/counting_and_probability/339.json,Counting & Probability,general,A,1.0,1.0,A,0.55,B,0.95,A,0.95,B
OpenThinker2-7B,test/counting_and_probability/870.json,Counting & Probability,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/counting_and_probability/216.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.98,A,0.95,B
OpenThinker2-7B,test/counting_and_probability/737.json,Counting & Probability,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,B
OpenThinker2-7B,test/counting_and_probability/116.json,Counting & Probability,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
OpenThinker2-7B,test/counting_and_probability/238.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,,,B
OpenThinker2-7B,test/counting_and_probability/1014.json,Counting & Probability,general,Tie,0.5,0.5,A,0.8,B,0.98,A,0.95,B
OpenThinker2-7B,test/counting_and_probability/14.json,Counting & Probability,general,B,1.0,0.95,B,0.9,B,0.95,A,0.95,A
OpenThinker2-7B,test/counting_and_probability/188.json,Counting & Probability,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,B
OpenThinker2-7B,test/counting_and_probability/761.json,Counting & Probability,general,A,0.0,1.0,B,0.95,B,0.98,A,0.95,A
OpenThinker2-7B,test/counting_and_probability/10.json,Counting & Probability,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,A
OpenThinker2-7B,test/counting_and_probability/731.json,Counting & Probability,general,A,0.0,0.5,B,0.85,B,0.95,B,0.9,B
OpenThinker2-7B,test/counting_and_probability/190.json,Counting & Probability,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
OpenThinker2-7B,test/counting_and_probability/1003.json,Counting & Probability,general,A,1.0,0.95,A,0.8,A,0.95,A,0.95,B
OpenThinker2-7B,test/counting_and_probability/199.json,Counting & Probability,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/807.json,Precalculus,general,B,0.0,0.95,A,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/927.json,Precalculus,general,A,1.0,0.95,B,0.55,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1303.json,Precalculus,general,B,0.0,0.95,A,0.65,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/990.json,Precalculus,general,A,1.0,1.0,B,0.85,A,0.98,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1199.json,Precalculus,general,A,0.0,1.0,B,0.85,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/779.json,Precalculus,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/285.json,Precalculus,general,B,1.0,0.95,A,0.85,B,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1105.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/675.json,Precalculus,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1146.json,Precalculus,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1313.json,Precalculus,general,B,0.0,0.9,A,0.9,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/24313.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/34.json,Precalculus,general,Tie,0.5,0.5,A,0.85,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1300.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/44.json,Precalculus,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/477.json,Precalculus,general,A,1.0,0.95,A,1.0,A,0.98,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/43.json,Precalculus,general,B,0.0,0.95,A,0.95,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/986.json,Precalculus,general,A,0.0,1.0,B,0.9,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/117.json,Precalculus,general,B,1.0,0.85,B,0.85,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/697.json,Precalculus,general,B,1.0,0.85,B,0.85,B,0.85,B,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/659.json,Precalculus,general,A,1.0,0.95,A,0.9,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/263.json,Precalculus,general,A,0.0,0.95,A,0.85,B,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/541.json,Precalculus,general,A,1.0,0.95,A,1.0,A,0.99,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/190.json,Precalculus,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/819.json,Precalculus,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1056.json,Precalculus,general,Tie,0.5,0.5,A,0.55,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/441.json,Precalculus,general,B,0.0,0.95,A,0.8,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/989.json,Precalculus,general,B,1.0,0.85,B,0.85,B,0.85,B,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/920.json,Precalculus,general,B,0.0,0.95,A,0.85,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/452.json,Precalculus,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/580.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.98,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/768.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.85,,,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1172.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1201.json,Precalculus,general,A,0.0,0.95,B,0.7,B,0.9,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/881.json,Precalculus,general,A,0.0,0.95,B,0.65,B,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/695.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/742.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/801.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/826.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1281.json,Precalculus,general,A,0.0,1.0,B,0.55,B,0.9,B,0.8,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/96.json,Precalculus,general,A,1.0,0.95,A,0.85,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1289.json,Precalculus,general,B,0.0,0.95,A,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/902.json,Precalculus,general,A,0.0,1.0,B,0.85,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1291.json,Precalculus,general,B,0.0,0.95,A,0.9,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/398.json,Precalculus,general,B,1.0,0.9,B,0.95,B,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/681.json,Precalculus,general,A,0.0,0.85,B,0.9,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/145.json,Precalculus,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/625.json,Precalculus,general,A,1.0,0.95,A,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1202.json,Precalculus,general,A,0.0,1.0,B,0.85,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1133.json,Precalculus,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/499.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/323.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/703.json,Precalculus,general,B,1.0,1.0,B,0.85,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1252.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.98,,,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/1082.json,Precalculus,general,A,0.0,1.0,A,0.85,B,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/precalculus/356.json,Precalculus,general,B,0.0,0.95,A,0.75,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1994.json,Intermediate Algebra,general,A,0.0,1.0,B,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1197.json,Intermediate Algebra,general,B,1.0,0.95,B,0.65,B,0.85,B,0.75,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/134.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1000.json,Intermediate Algebra,general,A,0.0,0.95,A,0.85,B,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/607.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1388.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/428.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1454.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.9,B,0.75,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1217.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1168.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.95,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/956.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1247.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/279.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/207.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/623.json,Intermediate Algebra,general,A,0.0,0.95,B,0.55,B,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/47.json,Intermediate Algebra,general,B,0.0,0.95,B,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1849.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/2046.json,Intermediate Algebra,general,A,0.0,1.0,B,0.9,B,0.9,A,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/662.json,Intermediate Algebra,general,A,0.0,1.0,B,0.75,B,0.85,A,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/582.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.9,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/431.json,Intermediate Algebra,general,A,0.0,0.9,B,0.6,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/558.json,Intermediate Algebra,general,B,1.0,0.85,A,0.75,B,0.85,B,0.7,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/362.json,Intermediate Algebra,general,B,1.0,1.0,B,0.95,B,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/515.json,Intermediate Algebra,general,B,0.0,0.95,A,0.85,A,0.98,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/894.json,Intermediate Algebra,general,A,1.0,0.85,A,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/345.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.75,B,0.85,A,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1898.json,Intermediate Algebra,general,B,1.0,0.95,A,0.9,B,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/232.json,Intermediate Algebra,general,B,0.0,0.95,A,0.95,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/128.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1063.json,Intermediate Algebra,general,A,1.0,1.0,B,0.6,A,0.85,A,0.7,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1126.json,Intermediate Algebra,general,B,0.0,0.95,A,0.51,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/2022.json,Intermediate Algebra,general,A,0.0,0.95,B,0.75,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1151.json,Intermediate Algebra,general,B,1.0,0.95,B,0.9,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1408.json,Intermediate Algebra,general,A,1.0,0.85,A,0.9,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/966.json,Intermediate Algebra,general,B,1.0,0.85,B,0.95,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/964.json,Intermediate Algebra,general,A,1.0,0.95,A,1.0,A,0.85,,,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1410.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/991.json,Intermediate Algebra,general,B,0.0,0.9,A,0.9,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/183.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1422.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/2196.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/591.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.95,B,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1555.json,Intermediate Algebra,general,B,0.0,0.95,A,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1510.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.7,B,0.85,B,0.85,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/102.json,Intermediate Algebra,general,B,1.0,0.9,B,0.95,B,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/986.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.9,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1354.json,Intermediate Algebra,general,A,0.0,1.0,B,0.85,B,0.95,B,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1837.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/337.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1210.json,Intermediate Algebra,general,A only,0.0,0.5,B,0.95,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1123.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/149.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1411.json,Intermediate Algebra,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/960.json,Intermediate Algebra,general,A,0.0,1.0,B,0.85,B,0.92,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1300.json,Intermediate Algebra,general,A,1.0,1.0,A,0.9,B,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/90.json,Intermediate Algebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/754.json,Intermediate Algebra,general,B,1.0,0.95,B,0.9,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/446.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1544.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.9,,,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1714.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/2152.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/117.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/190.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/776.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1566.json,Intermediate Algebra,general,B,0.0,0.95,A,0.85,A,0.95,A,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1572.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1166.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/860.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,B,0.85,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1407.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1405.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/690.json,Intermediate Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/955.json,Intermediate Algebra,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1992.json,Intermediate Algebra,general,A only,0.0,0.5,A,0.85,B,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1111.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1791.json,Intermediate Algebra,general,A,0.0,1.0,B,0.9,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1806.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1797.json,Intermediate Algebra,general,A,1.0,0.95,A,1.0,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/2146.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/2015.json,Intermediate Algebra,general,B,1.0,0.85,B,0.65,B,0.85,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/121.json,Intermediate Algebra,general,A,0.0,0.95,A,0.9,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1014.json,Intermediate Algebra,general,B,0.0,0.95,A,0.95,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1462.json,Intermediate Algebra,general,B,1.0,0.95,B,0.75,B,0.92,B,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/199.json,Intermediate Algebra,general,B,0.0,0.95,B,0.85,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1779.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,B,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1102.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/834.json,Intermediate Algebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/158.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/752.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,B,0.98,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1279.json,Intermediate Algebra,general,A,0.0,0.85,B,0.75,B,0.9,B,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1467.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/101.json,Intermediate Algebra,general,B,0.0,1.0,A,0.85,A,0.92,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1365.json,Intermediate Algebra,general,A,0.0,0.85,B,0.9,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1350.json,Intermediate Algebra,general,B,0.0,0.85,A,0.9,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1930.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1981.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1232.json,Intermediate Algebra,general,B,0.0,1.0,A,0.95,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/intermediate_algebra/1508.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2584.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1349.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.98,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2036.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1098.json,Algebra,general,A,1.0,0.9,A,1.0,A,0.98,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1837.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.9,,,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2193.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2427.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1072.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/24.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2214.json,Algebra,general,B,1.0,0.95,B,0.9,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/305.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1265.json,Algebra,general,A,1.0,0.85,A,0.85,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/187.json,Algebra,general,B,1.0,0.95,B,0.8,B,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/769.json,Algebra,general,B,0.0,0.85,A,0.6,A,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/722.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2046.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.98,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2253.json,Algebra,general,A,1.0,0.95,B,0.92,A,0.98,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1004.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1035.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2700.json,Algebra,general,B,1.0,0.95,B,0.85,B,0.92,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/893.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/567.json,Algebra,general,B,0.0,1.0,A,0.85,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/892.json,Algebra,general,A,1.0,0.85,B,0.9,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2023.json,Algebra,general,A,0.0,0.95,B,0.75,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/873.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2058.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2593.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2157.json,Algebra,general,A,1.0,0.95,A,0.95,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2251.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1332.json,Algebra,general,A,1.0,0.85,B,0.85,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/972.json,Algebra,general,A,1.0,1.0,B,0.95,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2232.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/661.json,Algebra,general,B,1.0,0.85,B,0.85,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/246.json,Algebra,general,B,0.0,0.95,A,0.9,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1519.json,Algebra,general,A,1.0,0.95,A,0.95,B,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/988.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2570.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.98,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/621.json,Algebra,general,A,1.0,0.95,A,1.0,A,1.0,,,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1255.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2517.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/478.json,Algebra,general,B,0.0,0.95,A,0.55,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/297.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,A,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/841.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/686.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/351.json,Algebra,general,B,1.0,0.9,B,0.95,B,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1275.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.98,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1082.json,Algebra,general,B,1.0,0.95,A,0.75,B,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1214.json,Algebra,general,A,1.0,0.9,B,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2199.json,Algebra,general,A,1.0,0.95,A,0.85,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/733.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/109.json,Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1937.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.98,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/291.json,Algebra,general,A,1.0,0.85,A,0.9,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2102.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/907.json,Algebra,general,A,1.0,1.0,A,1.0,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/864.json,Algebra,general,A,0.0,1.0,A,0.85,B,0.98,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2159.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.98,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1578.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.95,B,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/975.json,Algebra,general,A,0.0,0.95,A,0.85,B,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1143.json,Algebra,general,B,0.0,0.95,B,0.95,A,0.98,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2626.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1787.json,Algebra,general,A,0.0,0.95,A,0.85,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1934.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2064.json,Algebra,general,B,0.0,0.95,A,0.9,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/694.json,Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/524.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2551.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/346.json,Algebra,general,A,1.0,0.9,A,0.9,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1282.json,Algebra,general,Tie,0.5,0.5,A,0.85,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1184.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/634.json,Algebra,general,A,0.0,0.95,A,0.95,B,0.95,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2486.json,Algebra,general,A,1.0,1.0,A,0.95,B,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2257.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1842.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/791.json,Algebra,general,B,0.0,0.95,B,0.85,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/276.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2735.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/425.json,Algebra,general,A or B,0.0,0.5,B,0.85,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1936.json,Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2176.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/509.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1457.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2592.json,Algebra,general,B,0.0,0.95,A,0.85,A,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/858.json,Algebra,general,B,0.0,0.85,A,0.95,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1529.json,Algebra,general,A,0.0,0.95,B,0.51,B,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1338.json,Algebra,general,Tie,0.5,0.5,B,0.9,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1547.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.98,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/529.json,Algebra,general,B,0.0,0.95,B,0.95,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1078.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/251.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1199.json,Algebra,general,Tie,0.5,0.5,B,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2264.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1303.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/101.json,Algebra,general,A,0.0,0.95,A,0.9,B,0.95,B,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/170.json,Algebra,general,B,1.0,0.95,B,0.95,A,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/849.json,Algebra,general,A,1.0,0.95,B,0.51,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1031.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/853.json,Algebra,general,A,1.0,0.95,B,0.9,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2277.json,Algebra,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/518.json,Algebra,general,A,1.0,0.95,A,0.9,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/114.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.9,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1960.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2680.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2391.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/776.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1796.json,Algebra,general,B,0.0,0.95,A,0.85,A,0.95,A,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1339.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2743.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2043.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1553.json,Algebra,general,B,0.0,0.9,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2080.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1343.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/algebra/668.json,Algebra,general,B,0.0,1.0,A,1.0,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2430.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2789.json,Algebra,general,B,0.0,0.97,A,0.95,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1814.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2476.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2780.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/824.json,Algebra,general,B,0.0,0.95,B,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/1425.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/224.json,Algebra,general,B,0.0,0.95,A,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/435.json,Algebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2470.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/algebra/2779.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/572.json,Number Theory,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/515.json,Number Theory,general,B,0.0,0.95,B,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/1032.json,Number Theory,general,A,1.0,0.95,A,0.98,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/737.json,Number Theory,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/864.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/627.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.95,B,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/45.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/1055.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/46.json,Number Theory,general,A,1.0,0.95,A,0.6,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/516.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/357.json,Number Theory,general,A,1.0,1.0,A,1.0,A,0.98,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/914.json,Number Theory,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/847.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/753.json,Number Theory,general,B,0.0,0.9,A,1.0,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/1257.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/156.json,Number Theory,general,A,0.0,1.0,B,0.9,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/612.json,Number Theory,general,B,0.0,0.95,A,0.85,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/931.json,Number Theory,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/521.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/598.json,Number Theory,general,B,0.0,0.95,A,1.0,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/978.json,Number Theory,general,B,0.0,0.95,B,0.9,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/838.json,Number Theory,general,A,1.0,0.95,B,0.85,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/149.json,Number Theory,general,A,1.0,0.95,A,1.0,A,0.98,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/1201.json,Number Theory,general,B,0.0,0.95,A,0.9,A,0.98,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/234.json,Number Theory,general,A,1.0,0.95,A,1.0,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/417.json,Number Theory,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/89.json,Number Theory,general,B,0.0,0.85,B,0.95,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/183.json,Number Theory,general,B,0.0,0.95,A,0.9,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/1065.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/466.json,Number Theory,general,B,1.0,0.95,B,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/634.json,Number Theory,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/533.json,Number Theory,general,B,0.0,0.95,A,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/691.json,Number Theory,general,B,1.0,0.95,B,1.0,B,0.95,B,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/1287.json,Number Theory,general,A,0.0,1.0,B,0.55,B,0.98,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/631.json,Number Theory,general,A,0.0,0.95,B,1.0,B,0.98,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/488.json,Number Theory,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/1172.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/203.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/911.json,Number Theory,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/483.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/368.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/686.json,Number Theory,general,A,0.0,0.95,B,0.65,B,0.98,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/820.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/109.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/427.json,Number Theory,general,B,0.0,0.95,A,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/1185.json,Number Theory,general,A,0.0,0.9,B,0.85,B,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/928.json,Number Theory,general,A,1.0,0.85,B,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/132.json,Number Theory,general,B,0.0,0.95,B,0.55,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/769.json,Number Theory,general,A only,0.0,0.5,A,1.0,A,0.98,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/1002.json,Number Theory,general,A,0.0,0.95,A,0.9,B,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/410.json,Number Theory,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/255.json,Number Theory,general,A,0.0,0.95,A,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/1000.json,Number Theory,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/13.json,Number Theory,general,A,1.0,0.95,A,0.95,B,0.98,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/459.json,Number Theory,general,A,0.0,0.95,A,0.95,B,0.98,,,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/342.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/679.json,Number Theory,general,A,1.0,0.95,A,1.0,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/72.json,Number Theory,general,A,1.0,0.85,A,0.85,A,0.98,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/22.json,Number Theory,general,B,0.0,0.95,A,1.0,A,0.98,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/1128.json,Number Theory,general,B,0.0,0.95,A,0.6,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/1090.json,Number Theory,general,B,1.0,0.95,B,1.0,B,0.99,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/number_theory/239.json,Number Theory,general,A,1.0,0.85,A,0.85,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1622.json,Prealgebra,general,A,1.0,0.95,A,0.55,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1139.json,Prealgebra,general,Tie,0.5,0.5,A,0.65,A,0.85,B,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1840.json,Prealgebra,general,A,1.0,0.95,A,1.0,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1302.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/930.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1558.json,Prealgebra,general,B,0.0,0.95,A,0.95,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1388.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/951.json,Prealgebra,general,B,0.0,0.95,A,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/572.json,Prealgebra,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1247.json,Prealgebra,general,B,0.0,0.95,A,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1747.json,Prealgebra,general,A,1.0,0.95,B,0.95,A,0.98,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1233.json,Prealgebra,general,A,1.0,0.95,A,0.95,B,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/192.json,Prealgebra,general,A,1.0,0.95,A,0.95,B,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/307.json,Prealgebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1761.json,Prealgebra,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1646.json,Prealgebra,general,B,1.0,0.95,B,0.75,B,0.85,A,0.7,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/105.json,Prealgebra,general,A,1.0,0.85,A,0.75,B,0.85,A,0.7,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1924.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1804.json,Prealgebra,general,B,0.0,0.95,A,1.0,A,0.98,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1733.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/505.json,Prealgebra,general,B,0.0,0.95,A,0.55,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1686.json,Prealgebra,general,B,1.0,0.95,B,0.55,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1807.json,Prealgebra,general,A,0.0,0.95,B,0.8,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1297.json,Prealgebra,general,A,0.0,0.95,B,0.55,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1655.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1356.json,Prealgebra,general,A,0.0,0.95,A,0.5,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1003.json,Prealgebra,general,A,0.0,0.95,A,0.55,B,0.9,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1272.json,Prealgebra,general,B,0.0,0.95,A,0.85,A,0.98,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1113.json,Prealgebra,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1908.json,Prealgebra,general,B,1.0,0.95,B,0.95,A,0.95,B,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1922.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1907.json,Prealgebra,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/2086.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/378.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1555.json,Prealgebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1436.json,Prealgebra,general,B,1.0,0.95,A,1.0,B,0.95,B,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1961.json,Prealgebra,general,B,1.0,0.95,B,1.0,B,0.95,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/2057.json,Prealgebra,general,B,1.0,0.95,B,0.85,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/153.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/874.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.85,B,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1251.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1458.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1995.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1317.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1742.json,Prealgebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/993.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1834.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1512.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/260.json,Prealgebra,general,B,0.0,0.95,B,0.9,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1787.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1044.json,Prealgebra,general,A,1.0,0.95,A,1.0,A,0.98,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/465.json,Prealgebra,general,A,0.0,1.0,B,0.9,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1423.json,Prealgebra,general,B,1.0,0.95,B,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/954.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1973.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1730.json,Prealgebra,general,A,0.0,0.9,A,0.95,B,0.98,,,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1238.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1353.json,Prealgebra,general,B,0.0,0.95,A,0.95,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1187.json,Prealgebra,general,B,1.0,0.95,B,1.0,A,0.95,B,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1743.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1865.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1298.json,Prealgebra,general,A,1.0,0.95,A,0.51,A,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/2066.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/631.json,Prealgebra,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/977.json,Prealgebra,general,A,1.0,0.95,A,0.98,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1991.json,Prealgebra,general,A,1.0,1.0,A,0.98,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1784.json,Prealgebra,general,B,0.0,0.9,A,1.0,A,0.95,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1572.json,Prealgebra,general,A,1.0,0.85,A,0.95,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/65.json,Prealgebra,general,A,1.0,0.95,A,0.8,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1227.json,Prealgebra,general,B,0.0,0.95,A,0.95,A,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/2019.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1640.json,Prealgebra,general,B,0.0,1.0,A,1.0,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/2037.json,Prealgebra,general,A,0.0,1.0,A,0.85,B,0.98,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/996.json,Prealgebra,general,A,1.0,0.95,A,0.75,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/805.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/914.json,Prealgebra,general,B,0.0,0.9,B,0.6,A,0.65,A,0.8,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1114.json,Prealgebra,general,A,1.0,0.95,A,0.85,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/846.json,Prealgebra,general,B,1.0,0.95,B,0.85,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1930.json,Prealgebra,general,B,1.0,1.0,B,0.9,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1252.json,Prealgebra,general,A,0.0,0.85,A,0.85,B,0.98,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1203.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/prealgebra/1128.json,Prealgebra,general,A,1.0,0.95,A,1.0,A,0.98,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/248.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/434.json,Geometry,general,A,0.0,0.75,B,0.65,B,0.95,B,0.6,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/967.json,Geometry,general,A,0.0,0.85,B,0.95,B,0.98,A,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/627.json,Geometry,general,Tie,0.5,0.5,B,0.65,B,0.8,A,0.8,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/178.json,Geometry,general,B,0.0,0.95,A,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/456.json,Geometry,general,B,1.0,0.85,B,0.95,A,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/353.json,Geometry,general,A,0.0,1.0,B,0.85,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/183.json,Geometry,general,A,0.0,0.75,B,0.85,B,0.9,B,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/283.json,Geometry,general,A,0.0,0.95,B,0.85,A,0.9,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/1140.json,Geometry,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/172.json,Geometry,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/880.json,Geometry,general,A,0.0,0.95,A,0.65,B,0.9,B,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/802.json,Geometry,general,A,1.0,0.85,A,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/65.json,Geometry,general,A,1.0,1.0,A,0.95,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/702.json,Geometry,general,B,1.0,1.0,B,0.85,B,0.98,B,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/221.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/547.json,Geometry,general,A,1.0,1.0,A,0.85,A,0.95,A,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/229.json,Geometry,general,A,0.0,1.0,B,0.95,B,0.98,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/254.json,Geometry,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/473.json,Geometry,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/347.json,Geometry,general,B,1.0,0.95,B,0.95,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/483.json,Geometry,general,A,1.0,0.95,B,0.9,A,0.98,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/826.json,Geometry,general,B,0.0,0.9,A,0.85,A,0.85,A,0.7,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/226.json,Geometry,general,A,1.0,1.0,A,0.95,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/686.json,Geometry,general,A,1.0,0.85,A,0.9,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/1097.json,Geometry,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/965.json,Geometry,general,A,0.0,0.85,B,0.65,B,0.9,B,0.8,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/711.json,Geometry,general,A,0.0,0.95,B,0.75,B,0.95,B,0.9,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/1108.json,Geometry,general,A,1.0,0.95,A,0.55,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/947.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/465.json,Geometry,general,A,0.0,0.95,B,0.9,B,0.95,B,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/73.json,Geometry,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/106.json,Geometry,general,B,1.0,0.95,B,1.0,B,0.95,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/846.json,Geometry,general,A,1.0,1.0,A,0.9,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/538.json,Geometry,general,A,0.0,0.95,B,1.0,B,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/795.json,Geometry,general,A,1.0,1.0,A,0.8,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/817.json,Geometry,general,A,0.0,0.85,B,0.75,A,0.9,B,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/843.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/477.json,Geometry,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/geometry/561.json,Geometry,general,A,0.0,0.9,B,0.85,B,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/geometry/615.json,Geometry,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/525.json,Counting & Probability,general,A,0.0,0.85,B,0.85,B,0.95,B,0.8,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/666.json,Counting & Probability,general,B,0.0,0.95,A,0.85,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/134.json,Counting & Probability,general,A,1.0,0.9,A,1.0,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/119.json,Counting & Probability,general,A,0.0,0.95,B,0.85,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/1114.json,Counting & Probability,general,B,1.0,0.95,B,0.85,A,0.95,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/377.json,Counting & Probability,general,B,0.0,0.95,A,0.85,B,0.9,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/23957.json,Counting & Probability,general,A,1.0,1.0,A,0.85,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/1060.json,Counting & Probability,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/430.json,Counting & Probability,general,B,1.0,1.0,B,0.7,B,0.75,B,0.65,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/159.json,Counting & Probability,general,B,1.0,0.95,A,0.95,B,0.95,B,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/230.json,Counting & Probability,general,A,1.0,0.95,A,0.85,B,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/803.json,Counting & Probability,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/181.json,Counting & Probability,general,A,1.0,0.85,A,0.75,A,0.7,A,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/51.json,Counting & Probability,general,B,0.0,0.95,A,0.85,B,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/508.json,Counting & Probability,general,B,0.0,0.95,A,0.85,A,0.98,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/389.json,Counting & Probability,general,B,1.0,0.95,A,0.95,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/765.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/282.json,Counting & Probability,general,B,0.0,0.85,A,0.9,A,0.75,A,0.7,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/71.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/894.json,Counting & Probability,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/1009.json,Counting & Probability,general,A,0.0,1.0,B,0.9,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/913.json,Counting & Probability,general,B,0.0,0.95,B,0.95,A,0.95,,,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/25149.json,Counting & Probability,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/339.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/870.json,Counting & Probability,general,A,1.0,0.9,B,0.9,A,0.95,A,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/216.json,Counting & Probability,general,B,0.0,0.85,A,1.0,A,0.98,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/737.json,Counting & Probability,general,A,0.0,0.95,A,0.9,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/116.json,Counting & Probability,general,B,1.0,0.95,B,0.85,B,0.95,A,1.0,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/238.json,Counting & Probability,general,B,1.0,0.9,A,0.9,B,0.9,B,0.9,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/1014.json,Counting & Probability,general,B,0.0,0.95,A,0.85,A,0.98,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/14.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,0.75,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/188.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/761.json,Counting & Probability,general,B,0.0,0.85,B,0.6,A,0.95,A,1.0,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/10.json,Counting & Probability,general,A,1.0,1.0,A,0.85,A,0.95,,,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/731.json,Counting & Probability,general,A,0.0,0.85,B,0.85,B,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/190.json,Counting & Probability,general,B,0.0,0.85,A,0.95,A,0.95,B,0.95,A
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/1003.json,Counting & Probability,general,A,0.0,1.0,B,0.98,B,0.95,B,0.95,B
DeepSeek-R1-Distill-Qwen-7B,test/counting_and_probability/199.json,Counting & Probability,general,B,0.0,0.95,A,1.0,B,0.95,A,0.95,A
Bespoke-Stratos-7B,test/precalculus/807.json,Precalculus,general,B,1.0,0.95,B,0.9,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/precalculus/927.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.95,A,1.0,A
Bespoke-Stratos-7B,test/precalculus/1303.json,Precalculus,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Bespoke-Stratos-7B,test/precalculus/990.json,Precalculus,general,B,1.0,0.95,B,0.85,B,0.85,,,A
Bespoke-Stratos-7B,test/precalculus/1199.json,Precalculus,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/precalculus/779.json,Precalculus,general,B,0.0,0.95,A,0.85,A,0.95,A,0.9,A
Bespoke-Stratos-7B,test/precalculus/285.json,Precalculus,general,A,0.0,0.95,B,0.65,B,0.85,B,0.95,A
Bespoke-Stratos-7B,test/precalculus/1105.json,Precalculus,general,B,0.0,0.95,A,0.85,A,0.95,B,0.9,A
Bespoke-Stratos-7B,test/precalculus/675.json,Precalculus,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/precalculus/1146.json,Precalculus,general,B,0.0,0.95,B,0.75,A,0.85,,,A
Bespoke-Stratos-7B,test/precalculus/1313.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,A
Bespoke-Stratos-7B,test/precalculus/24313.json,Precalculus,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/precalculus/34.json,Precalculus,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/precalculus/1300.json,Precalculus,general,A,0.0,0.98,B,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/precalculus/44.json,Precalculus,general,A,1.0,0.85,A,0.85,A,0.85,A,0.95,B
Bespoke-Stratos-7B,test/precalculus/477.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,A
Bespoke-Stratos-7B,test/precalculus/43.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
Bespoke-Stratos-7B,test/precalculus/986.json,Precalculus,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/precalculus/117.json,Precalculus,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/precalculus/697.json,Precalculus,general,A,1.0,0.95,A,1.0,A,0.92,A,0.95,A
Bespoke-Stratos-7B,test/precalculus/659.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,A
Bespoke-Stratos-7B,test/precalculus/263.json,Precalculus,general,B,0.0,0.95,B,0.6,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/precalculus/541.json,Precalculus,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/precalculus/190.json,Precalculus,general,A,1.0,0.85,A,1.0,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/precalculus/819.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/precalculus/1056.json,Precalculus,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,A
Bespoke-Stratos-7B,test/precalculus/441.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/precalculus/989.json,Precalculus,general,A,0.0,0.85,A,0.6,B,0.7,B,0.95,A
Bespoke-Stratos-7B,test/precalculus/920.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/precalculus/452.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/precalculus/580.json,Precalculus,general,B,1.0,0.95,B,0.9,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/precalculus/768.json,Precalculus,general,A,1.0,1.0,A,0.95,A,0.65,A,0.95,A
Bespoke-Stratos-7B,test/precalculus/1172.json,Precalculus,general,B,1.0,1.0,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/precalculus/1201.json,Precalculus,general,A,1.0,0.95,B,0.7,A,0.85,A,0.6,B
Bespoke-Stratos-7B,test/precalculus/881.json,Precalculus,general,A,1.0,0.95,B,0.75,A,0.98,A,0.95,A
Bespoke-Stratos-7B,test/precalculus/695.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Bespoke-Stratos-7B,test/precalculus/742.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/precalculus/801.json,Precalculus,general,A,0.0,0.85,B,0.75,B,0.75,B,0.75,A
Bespoke-Stratos-7B,test/precalculus/826.json,Precalculus,general,A,0.0,0.85,B,0.75,B,0.75,A,0.7,A
Bespoke-Stratos-7B,test/precalculus/1281.json,Precalculus,general,B,1.0,0.5,B,0.85,B,0.75,B,0.9,B
Bespoke-Stratos-7B,test/precalculus/96.json,Precalculus,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,B
Bespoke-Stratos-7B,test/precalculus/1289.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/precalculus/902.json,Precalculus,general,B,1.0,0.85,B,0.6,B,0.95,B,0.85,A
Bespoke-Stratos-7B,test/precalculus/1291.json,Precalculus,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/precalculus/398.json,Precalculus,general,A,1.0,0.95,A,1.0,A,0.98,A,1.0,A
Bespoke-Stratos-7B,test/precalculus/681.json,Precalculus,general,A,1.0,0.95,A,1.0,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/precalculus/145.json,Precalculus,general,B,1.0,0.95,B,0.98,B,0.95,B,1.0,A
Bespoke-Stratos-7B,test/precalculus/625.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/precalculus/1202.json,Precalculus,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/precalculus/1133.json,Precalculus,general,B,1.0,0.95,B,0.85,B,0.9,B,0.7,B
Bespoke-Stratos-7B,test/precalculus/499.json,Precalculus,general,B,0.0,0.95,A,0.65,A,0.7,A,0.95,A
Bespoke-Stratos-7B,test/precalculus/323.json,Precalculus,general,B,1.0,0.95,B,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/precalculus/703.json,Precalculus,general,A,1.0,1.0,A,1.0,A,0.95,A,0.7,A
Bespoke-Stratos-7B,test/precalculus/1252.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/precalculus/1082.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.9,B,0.95,B
Bespoke-Stratos-7B,test/precalculus/356.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1994.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1197.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.9,,,B
Bespoke-Stratos-7B,test/intermediate_algebra/134.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1000.json,Intermediate Algebra,general,B,0.0,0.95,A,0.95,A,0.95,A,1.0,A
Bespoke-Stratos-7B,test/intermediate_algebra/607.json,Intermediate Algebra,general,A,1.0,0.95,A,0.98,A,0.99,A,1.0,A
Bespoke-Stratos-7B,test/intermediate_algebra/1388.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/428.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1454.json,Intermediate Algebra,general,A,1.0,0.85,B,0.75,A,0.95,A,0.7,A
Bespoke-Stratos-7B,test/intermediate_algebra/1217.json,Intermediate Algebra,general,A,1.0,0.65,A,0.95,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/intermediate_algebra/1168.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/956.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.75,B,0.8,B
Bespoke-Stratos-7B,test/intermediate_algebra/1247.json,Intermediate Algebra,general,B,0.0,0.95,B,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/279.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.98,A,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/207.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/623.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/47.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1849.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.85,B,1.0,A
Bespoke-Stratos-7B,test/intermediate_algebra/2046.json,Intermediate Algebra,general,A,1.0,0.85,A,0.9,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/662.json,Intermediate Algebra,general,A,0.0,0.7,B,0.7,B,0.65,A,0.75,B
Bespoke-Stratos-7B,test/intermediate_algebra/582.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.9,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/431.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/558.json,Intermediate Algebra,general,A,0.0,0.85,B,0.85,A,0.85,B,0.6,B
Bespoke-Stratos-7B,test/intermediate_algebra/362.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.85,B,0.9,A
Bespoke-Stratos-7B,test/intermediate_algebra/515.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/894.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/intermediate_algebra/345.json,Intermediate Algebra,general,A,0.0,0.65,B,0.85,B,0.92,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1898.json,Intermediate Algebra,general,A,0.0,0.95,B,0.75,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/232.json,Intermediate Algebra,general,A,0.0,1.0,B,0.85,B,0.75,B,0.8,B
Bespoke-Stratos-7B,test/intermediate_algebra/128.json,Intermediate Algebra,general,B,0.0,0.95,B,0.85,A,0.7,A,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1063.json,Intermediate Algebra,general,B,1.0,0.95,B,0.75,B,0.85,,,A
Bespoke-Stratos-7B,test/intermediate_algebra/1126.json,Intermediate Algebra,general,A,1.0,0.98,B,0.9,A,0.98,A,1.0,B
Bespoke-Stratos-7B,test/intermediate_algebra/2022.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.9,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1151.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.9,A,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1408.json,Intermediate Algebra,general,B,0.0,0.95,A,0.65,A,0.65,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/966.json,Intermediate Algebra,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/intermediate_algebra/964.json,Intermediate Algebra,general,A,0.0,0.85,B,0.85,B,0.85,,,A
Bespoke-Stratos-7B,test/intermediate_algebra/1410.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/991.json,Intermediate Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/183.json,Intermediate Algebra,general,B,1.0,0.95,B,0.8,B,0.95,B,0.75,B
Bespoke-Stratos-7B,test/intermediate_algebra/1422.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/2196.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.7,B,0.9,B
Bespoke-Stratos-7B,test/intermediate_algebra/591.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
Bespoke-Stratos-7B,test/intermediate_algebra/1555.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Bespoke-Stratos-7B,test/intermediate_algebra/1510.json,Intermediate Algebra,general,B,1.0,0.75,B,0.6,B,0.85,B,0.9,B
Bespoke-Stratos-7B,test/intermediate_algebra/102.json,Intermediate Algebra,general,A,1.0,0.75,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/986.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1354.json,Intermediate Algebra,general,B,1.0,0.75,A,0.85,B,0.9,,,A
Bespoke-Stratos-7B,test/intermediate_algebra/1837.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/337.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1210.json,Intermediate Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1123.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/149.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1411.json,Intermediate Algebra,general,B,0.0,0.95,A,0.95,A,0.9,A,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/960.json,Intermediate Algebra,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1300.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/90.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,B,0.98,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/754.json,Intermediate Algebra,general,B,1.0,0.95,B,1.0,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/446.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1544.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.85,A,0.9,B
Bespoke-Stratos-7B,test/intermediate_algebra/1714.json,Intermediate Algebra,general,A,1.0,0.95,A,0.9,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/2152.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.9,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/117.json,Intermediate Algebra,general,A,1.0,0.75,A,0.85,A,0.65,A,0.9,B
Bespoke-Stratos-7B,test/intermediate_algebra/190.json,Intermediate Algebra,general,A,0.0,0.95,B,0.8,B,0.85,B,0.65,A
Bespoke-Stratos-7B,test/intermediate_algebra/776.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1566.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1572.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1166.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/intermediate_algebra/860.json,Intermediate Algebra,general,A,1.0,0.95,B,0.9,A,0.9,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1407.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1405.json,Intermediate Algebra,general,B,1.0,0.85,B,0.98,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/690.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/955.json,Intermediate Algebra,general,A,0.0,0.75,B,0.95,A,0.7,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1992.json,Intermediate Algebra,general,B,1.0,0.85,B,0.95,B,0.98,B,1.0,B
Bespoke-Stratos-7B,test/intermediate_algebra/1111.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1791.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1806.json,Intermediate Algebra,general,A,1.0,0.95,A,0.98,A,0.98,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1797.json,Intermediate Algebra,general,A,1.0,0.98,B,0.75,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/2146.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/2015.json,Intermediate Algebra,general,A,1.0,0.95,A,0.6,A,0.7,A,0.9,B
Bespoke-Stratos-7B,test/intermediate_algebra/121.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1014.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1462.json,Intermediate Algebra,general,B,1.0,0.85,B,0.65,B,0.85,B,0.65,A
Bespoke-Stratos-7B,test/intermediate_algebra/199.json,Intermediate Algebra,general,B,0.0,0.95,B,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1779.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.9,B,0.9,A
Bespoke-Stratos-7B,test/intermediate_algebra/1102.json,Intermediate Algebra,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/intermediate_algebra/834.json,Intermediate Algebra,general,A,0.0,1.0,B,0.95,A,0.95,B,1.0,A
Bespoke-Stratos-7B,test/intermediate_algebra/158.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,1.0,A
Bespoke-Stratos-7B,test/intermediate_algebra/752.json,Intermediate Algebra,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1279.json,Intermediate Algebra,general,A,1.0,0.75,A,0.6,B,0.85,A,0.7,B
Bespoke-Stratos-7B,test/intermediate_algebra/1467.json,Intermediate Algebra,general,A,1.0,0.85,A,0.6,B,0.95,A,0.9,A
Bespoke-Stratos-7B,test/intermediate_algebra/101.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/intermediate_algebra/1365.json,Intermediate Algebra,general,A,1.0,0.85,A,0.98,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1350.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.85,,,B
Bespoke-Stratos-7B,test/intermediate_algebra/1930.json,Intermediate Algebra,general,B,1.0,0.3,B,0.8,B,0.65,A,0.85,B
Bespoke-Stratos-7B,test/intermediate_algebra/1981.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.98,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1232.json,Intermediate Algebra,general,B,0.0,0.95,A,0.85,B,0.95,A,0.95,A
Bespoke-Stratos-7B,test/intermediate_algebra/1508.json,Intermediate Algebra,general,B,1.0,0.6,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/2584.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/1349.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/2036.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.98,B,0.9,B
Bespoke-Stratos-7B,test/algebra/1098.json,Algebra,general,A,1.0,0.85,A,1.0,A,1.0,A,1.0,B
Bespoke-Stratos-7B,test/algebra/1837.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2193.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/2427.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/1072.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/24.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2214.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/305.json,Algebra,general,A,1.0,0.95,A,0.8,A,0.98,A,0.95,B
Bespoke-Stratos-7B,test/algebra/1265.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/187.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.98,A,0.95,A
Bespoke-Stratos-7B,test/algebra/769.json,Algebra,general,B,1.0,0.95,B,0.98,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/722.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2046.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2253.json,Algebra,general,B,0.0,0.95,B,0.85,A,0.95,A,0.9,A
Bespoke-Stratos-7B,test/algebra/1004.json,Algebra,general,A,0.0,0.95,B,0.92,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/1035.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2700.json,Algebra,general,A,1.0,0.95,A,0.7,B,0.65,,,B
Bespoke-Stratos-7B,test/algebra/893.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/567.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.98,B,0.95,B
Bespoke-Stratos-7B,test/algebra/892.json,Algebra,general,A,0.0,0.6,B,0.95,B,0.85,B,0.9,A
Bespoke-Stratos-7B,test/algebra/2023.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/873.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.98,A,1.0,B
Bespoke-Stratos-7B,test/algebra/2058.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/2593.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/2157.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/2251.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/1332.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/972.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/2232.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.98,A,0.95,B
Bespoke-Stratos-7B,test/algebra/661.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.65,A
Bespoke-Stratos-7B,test/algebra/246.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,A
Bespoke-Stratos-7B,test/algebra/1519.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.98,A,0.95,B
Bespoke-Stratos-7B,test/algebra/988.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.95,B,0.9,A
Bespoke-Stratos-7B,test/algebra/2570.json,Algebra,general,B,1.0,0.95,B,0.85,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/621.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/1255.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,A
Bespoke-Stratos-7B,test/algebra/2517.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,A
Bespoke-Stratos-7B,test/algebra/478.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/297.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/841.json,Algebra,general,A,0.0,0.95,B,1.0,B,0.98,B,0.95,B
Bespoke-Stratos-7B,test/algebra/686.json,Algebra,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/351.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/1275.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/1082.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/1214.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.98,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2199.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/733.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/109.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/1937.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.98,A,1.0,B
Bespoke-Stratos-7B,test/algebra/291.json,Algebra,general,B,0.0,0.95,A,0.85,A,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2102.json,Algebra,general,B,0.0,0.95,B,0.95,A,0.95,A,0.9,A
Bespoke-Stratos-7B,test/algebra/907.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/864.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2159.json,Algebra,general,B,1.0,0.95,B,0.95,A,0.98,B,0.95,B
Bespoke-Stratos-7B,test/algebra/1578.json,Algebra,general,A,0.0,0.95,B,0.7,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/975.json,Algebra,general,B,1.0,0.95,B,0.9,A,0.95,B,0.9,A
Bespoke-Stratos-7B,test/algebra/1143.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/2626.json,Algebra,general,A,1.0,0.95,A,0.98,A,0.98,A,0.95,A
Bespoke-Stratos-7B,test/algebra/1787.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.9,A,0.9,A
Bespoke-Stratos-7B,test/algebra/1934.json,Algebra,general,B,1.0,0.95,B,0.85,B,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/2064.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/694.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/524.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/2551.json,Algebra,general,B,0.0,0.95,A,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/346.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/1282.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/1184.json,Algebra,general,A,0.0,0.35,B,0.75,B,0.85,B,0.9,B
Bespoke-Stratos-7B,test/algebra/634.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/2486.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/2257.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/1842.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
Bespoke-Stratos-7B,test/algebra/791.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/276.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/2735.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/425.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/1936.json,Algebra,general,A,0.0,0.95,A,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2176.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.9,B,0.95,B
Bespoke-Stratos-7B,test/algebra/509.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.98,A,1.0,B
Bespoke-Stratos-7B,test/algebra/1457.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/2592.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/858.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/1529.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/1338.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/1547.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.98,B,0.95,A
Bespoke-Stratos-7B,test/algebra/529.json,Algebra,general,A,1.0,0.95,A,0.7,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/1078.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/251.json,Algebra,general,B,1.0,0.95,B,0.9,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/1199.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2264.json,Algebra,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/1303.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,A
Bespoke-Stratos-7B,test/algebra/101.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/170.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/849.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/1031.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/algebra/853.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.98,A,0.95,B
Bespoke-Stratos-7B,test/algebra/2277.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/518.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/114.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/1960.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/algebra/2680.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2391.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/776.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.9,A,0.95,A
Bespoke-Stratos-7B,test/algebra/1796.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/1339.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/2743.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2043.json,Algebra,general,A,0.0,0.95,A,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/1553.json,Algebra,general,A,0.0,0.95,B,0.7,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2080.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/algebra/1343.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/668.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.9,A
Bespoke-Stratos-7B,test/algebra/2430.json,Algebra,general,A,1.0,0.95,A,0.9,B,0.95,A,1.0,B
Bespoke-Stratos-7B,test/algebra/2789.json,Algebra,general,A,1.0,1.0,B,0.95,A,0.95,A,0.9,A
Bespoke-Stratos-7B,test/algebra/1814.json,Algebra,general,A,0.0,0.95,B,0.9,A,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/2476.json,Algebra,general,B,1.0,0.95,B,0.9,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/2780.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/824.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,A
Bespoke-Stratos-7B,test/algebra/1425.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/algebra/224.json,Algebra,general,B,0.0,0.95,B,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/algebra/435.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/algebra/2470.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.98,A,0.95,B
Bespoke-Stratos-7B,test/algebra/2779.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Bespoke-Stratos-7B,test/number_theory/572.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/number_theory/515.json,Number Theory,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/number_theory/1032.json,Number Theory,general,A,1.0,0.85,A,1.0,A,0.9,A,1.0,A
Bespoke-Stratos-7B,test/number_theory/737.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/number_theory/864.json,Number Theory,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,A
Bespoke-Stratos-7B,test/number_theory/627.json,Number Theory,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/number_theory/45.json,Number Theory,general,B,0.0,0.95,B,0.85,A,0.85,A,0.95,B
Bespoke-Stratos-7B,test/number_theory/1055.json,Number Theory,general,B,1.0,0.95,B,1.0,B,0.98,B,0.7,B
Bespoke-Stratos-7B,test/number_theory/46.json,Number Theory,general,B,1.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/number_theory/516.json,Number Theory,general,A,0.0,0.7,A,0.75,B,0.75,B,0.9,A
Bespoke-Stratos-7B,test/number_theory/357.json,Number Theory,general,B,1.0,0.85,B,1.0,B,0.98,B,1.0,B
Bespoke-Stratos-7B,test/number_theory/914.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/number_theory/847.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Bespoke-Stratos-7B,test/number_theory/753.json,Number Theory,general,A,1.0,1.0,A,1.0,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/1257.json,Number Theory,general,B,1.0,0.85,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/number_theory/156.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/612.json,Number Theory,general,A,1.0,0.95,A,0.98,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/931.json,Number Theory,general,A,1.0,0.95,B,0.95,A,0.98,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/521.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/598.json,Number Theory,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/978.json,Number Theory,general,B,1.0,0.95,B,0.98,B,0.95,B,1.0,B
Bespoke-Stratos-7B,test/number_theory/838.json,Number Theory,general,B,1.0,0.95,B,0.95,A,0.95,B,0.9,A
Bespoke-Stratos-7B,test/number_theory/149.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/number_theory/1201.json,Number Theory,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/number_theory/234.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/number_theory/417.json,Number Theory,general,A,1.0,0.95,A,0.7,A,0.95,B,0.9,A
Bespoke-Stratos-7B,test/number_theory/89.json,Number Theory,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/183.json,Number Theory,general,B,1.0,0.95,B,1.0,B,0.98,B,0.95,B
Bespoke-Stratos-7B,test/number_theory/1065.json,Number Theory,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/number_theory/466.json,Number Theory,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/number_theory/634.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.98,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/533.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
Bespoke-Stratos-7B,test/number_theory/691.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,A
Bespoke-Stratos-7B,test/number_theory/1287.json,Number Theory,general,A,1.0,0.95,B,0.95,A,0.98,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/631.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.92,B,0.95,B
Bespoke-Stratos-7B,test/number_theory/488.json,Number Theory,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/1172.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/number_theory/203.json,Number Theory,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/911.json,Number Theory,general,B,0.0,0.95,A,0.95,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/number_theory/483.json,Number Theory,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/number_theory/368.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/number_theory/686.json,Number Theory,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/820.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,A
Bespoke-Stratos-7B,test/number_theory/109.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/number_theory/427.json,Number Theory,general,A,0.0,1.0,B,1.0,B,0.95,B,1.0,A
Bespoke-Stratos-7B,test/number_theory/1185.json,Number Theory,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/928.json,Number Theory,general,A,0.0,0.95,B,0.6,B,0.95,A,0.8,B
Bespoke-Stratos-7B,test/number_theory/132.json,Number Theory,general,A,0.0,0.95,B,0.9,B,0.95,B,0.9,B
Bespoke-Stratos-7B,test/number_theory/769.json,Number Theory,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/number_theory/1002.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.9,A
Bespoke-Stratos-7B,test/number_theory/410.json,Number Theory,general,A,1.0,0.95,A,0.98,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/number_theory/255.json,Number Theory,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/1000.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/number_theory/13.json,Number Theory,general,A,0.0,0.95,B,0.9,A,0.95,B,1.0,A
Bespoke-Stratos-7B,test/number_theory/459.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/number_theory/342.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Bespoke-Stratos-7B,test/number_theory/679.json,Number Theory,general,A,1.0,0.95,A,1.0,A,0.98,A,1.0,A
Bespoke-Stratos-7B,test/number_theory/72.json,Number Theory,general,A,1.0,1.0,A,0.6,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/22.json,Number Theory,general,B,0.0,0.95,A,0.9,A,0.75,A,0.7,A
Bespoke-Stratos-7B,test/number_theory/1128.json,Number Theory,general,B,0.0,0.95,A,0.9,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/number_theory/1090.json,Number Theory,general,A,0.0,0.85,B,0.95,A,0.8,B,0.9,B
Bespoke-Stratos-7B,test/number_theory/239.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1622.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1139.json,Prealgebra,general,B,0.0,0.65,A,0.75,B,0.75,A,0.8,A
Bespoke-Stratos-7B,test/prealgebra/1840.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1302.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.98,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/930.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.98,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1558.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,A,1.0,A
Bespoke-Stratos-7B,test/prealgebra/1388.json,Prealgebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/951.json,Prealgebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/572.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1247.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1747.json,Prealgebra,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1233.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/192.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.9,A
Bespoke-Stratos-7B,test/prealgebra/307.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1761.json,Prealgebra,general,B,0.0,0.95,B,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1646.json,Prealgebra,general,B,1.0,0.65,B,0.95,B,0.95,B,0.9,B
Bespoke-Stratos-7B,test/prealgebra/105.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1924.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1804.json,Prealgebra,general,A,0.0,0.95,B,0.95,A,0.95,B,1.0,A
Bespoke-Stratos-7B,test/prealgebra/1733.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/505.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1686.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1807.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1297.json,Prealgebra,general,A,0.0,0.95,A,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1655.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1356.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1003.json,Prealgebra,general,A,0.0,0.65,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1272.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1113.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1908.json,Prealgebra,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,B
Bespoke-Stratos-7B,test/prealgebra/1922.json,Prealgebra,general,A,0.0,0.95,B,0.8,B,0.95,A,1.0,B
Bespoke-Stratos-7B,test/prealgebra/1907.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/2086.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/378.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1555.json,Prealgebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1436.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1961.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/2057.json,Prealgebra,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/153.json,Prealgebra,general,A,0.0,0.85,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/874.json,Prealgebra,general,A,0.0,0.75,B,0.6,A,0.65,B,0.9,A
Bespoke-Stratos-7B,test/prealgebra/1251.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1458.json,Prealgebra,general,A,1.0,0.95,B,0.8,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1995.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,A
Bespoke-Stratos-7B,test/prealgebra/1317.json,Prealgebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1742.json,Prealgebra,general,B,0.0,0.95,A,0.9,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/993.json,Prealgebra,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1834.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1512.json,Prealgebra,general,A,1.0,0.85,A,1.0,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/prealgebra/260.json,Prealgebra,general,B,0.0,0.55,A,0.98,A,0.85,A,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1787.json,Prealgebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1044.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/prealgebra/465.json,Prealgebra,general,A,1.0,0.85,A,0.65,A,0.75,B,0.9,A
Bespoke-Stratos-7B,test/prealgebra/1423.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/954.json,Prealgebra,general,B,0.0,0.95,B,0.85,A,0.92,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1973.json,Prealgebra,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1730.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1238.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1353.json,Prealgebra,general,A,1.0,1.0,A,0.8,A,0.95,A,1.0,A
Bespoke-Stratos-7B,test/prealgebra/1187.json,Prealgebra,general,B,1.0,0.75,B,0.75,B,0.6,A,0.7,A
Bespoke-Stratos-7B,test/prealgebra/1743.json,Prealgebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.9,A
Bespoke-Stratos-7B,test/prealgebra/1865.json,Prealgebra,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1298.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/2066.json,Prealgebra,general,A,1.0,0.85,A,0.95,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/prealgebra/631.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/977.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1991.json,Prealgebra,general,A,1.0,0.95,B,0.92,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1784.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1572.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/65.json,Prealgebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1227.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,A
Bespoke-Stratos-7B,test/prealgebra/2019.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1640.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/2037.json,Prealgebra,general,A,0.0,0.95,B,0.8,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/prealgebra/996.json,Prealgebra,general,A,0.0,0.95,B,0.9,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/805.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/914.json,Prealgebra,general,B,1.0,0.85,B,0.85,B,0.85,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1114.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/prealgebra/846.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1930.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/prealgebra/1252.json,Prealgebra,general,A,0.0,1.0,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1203.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/prealgebra/1128.json,Prealgebra,general,A,0.0,0.95,B,0.95,A,0.95,B,1.0,B
Bespoke-Stratos-7B,test/geometry/248.json,Geometry,general,B,1.0,0.95,B,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/geometry/434.json,Geometry,general,B,0.0,0.75,A,0.75,A,0.6,A,0.7,B
Bespoke-Stratos-7B,test/geometry/967.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/geometry/627.json,Geometry,general,A,0.0,0.75,B,0.92,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/geometry/178.json,Geometry,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/geometry/456.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/geometry/353.json,Geometry,general,B,1.0,0.95,B,0.95,A,0.95,B,0.9,A
Bespoke-Stratos-7B,test/geometry/183.json,Geometry,general,B,1.0,0.65,B,0.85,B,0.75,B,0.9,A
Bespoke-Stratos-7B,test/geometry/283.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/geometry/1140.json,Geometry,general,A,1.0,0.85,A,0.95,A,0.92,B,0.9,A
Bespoke-Stratos-7B,test/geometry/172.json,Geometry,general,B,0.0,0.95,A,1.0,A,0.9,A,0.95,B
Bespoke-Stratos-7B,test/geometry/880.json,Geometry,general,A,1.0,0.95,A,0.85,A,0.7,,,A
Bespoke-Stratos-7B,test/geometry/802.json,Geometry,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/geometry/65.json,Geometry,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
Bespoke-Stratos-7B,test/geometry/702.json,Geometry,general,A,1.0,0.95,B,0.85,A,0.7,A,0.6,B
Bespoke-Stratos-7B,test/geometry/221.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/geometry/547.json,Geometry,general,B,1.0,0.6,B,0.75,B,0.92,B,0.75,B
Bespoke-Stratos-7B,test/geometry/229.json,Geometry,general,A,1.0,0.95,A,0.98,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/geometry/254.json,Geometry,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/geometry/473.json,Geometry,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/geometry/347.json,Geometry,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,B
Bespoke-Stratos-7B,test/geometry/483.json,Geometry,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/geometry/826.json,Geometry,general,A,1.0,0.85,A,0.95,B,0.6,A,0.7,A
Bespoke-Stratos-7B,test/geometry/226.json,Geometry,general,B,1.0,0.95,B,1.0,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/geometry/686.json,Geometry,general,B,1.0,0.85,B,0.9,B,0.85,B,0.9,B
Bespoke-Stratos-7B,test/geometry/1097.json,Geometry,general,B,1.0,0.95,B,0.95,B,0.95,B,0.9,B
Bespoke-Stratos-7B,test/geometry/965.json,Geometry,general,A,1.0,0.75,A,0.7,A,0.6,A,0.7,A
Bespoke-Stratos-7B,test/geometry/711.json,Geometry,general,B,1.0,0.95,B,0.75,B,0.85,B,0.75,B
Bespoke-Stratos-7B,test/geometry/1108.json,Geometry,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/geometry/947.json,Geometry,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,A
Bespoke-Stratos-7B,test/geometry/465.json,Geometry,general,B,1.0,0.65,B,0.6,B,0.6,B,0.6,B
Bespoke-Stratos-7B,test/geometry/73.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/geometry/106.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,B
Bespoke-Stratos-7B,test/geometry/846.json,Geometry,general,A,1.0,0.95,A,0.75,A,0.5,,,A
Bespoke-Stratos-7B,test/geometry/538.json,Geometry,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/geometry/795.json,Geometry,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/geometry/817.json,Geometry,general,A,1.0,1.0,A,0.7,A,0.9,,,A
Bespoke-Stratos-7B,test/geometry/843.json,Geometry,general,A,1.0,1.0,B,0.85,A,0.95,A,1.0,A
Bespoke-Stratos-7B,test/geometry/477.json,Geometry,general,A,1.0,0.95,A,1.0,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/geometry/561.json,Geometry,general,A,1.0,0.95,A,0.98,A,0.95,A,1.0,B
Bespoke-Stratos-7B,test/geometry/615.json,Geometry,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/counting_and_probability/525.json,Counting & Probability,general,A,0.0,0.6,B,0.85,B,0.75,B,0.8,B
Bespoke-Stratos-7B,test/counting_and_probability/666.json,Counting & Probability,general,A,0.0,0.95,B,0.85,A,0.98,B,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/134.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/119.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/1114.json,Counting & Probability,general,A,1.0,0.98,A,0.85,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/counting_and_probability/377.json,Counting & Probability,general,B,0.0,0.95,A,0.92,A,0.98,A,1.0,A
Bespoke-Stratos-7B,test/counting_and_probability/23957.json,Counting & Probability,general,B,1.0,0.95,B,0.95,B,0.98,B,1.0,B
Bespoke-Stratos-7B,test/counting_and_probability/1060.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/430.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.85,,,A
Bespoke-Stratos-7B,test/counting_and_probability/159.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/counting_and_probability/230.json,Counting & Probability,general,B,0.0,0.95,B,0.85,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/counting_and_probability/803.json,Counting & Probability,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Bespoke-Stratos-7B,test/counting_and_probability/181.json,Counting & Probability,general,B,1.0,0.95,B,0.75,B,0.95,B,0.9,A
Bespoke-Stratos-7B,test/counting_and_probability/51.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/508.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/389.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/765.json,Counting & Probability,general,A,1.0,0.95,A,1.0,A,0.99,A,1.0,A
Bespoke-Stratos-7B,test/counting_and_probability/282.json,Counting & Probability,general,B,0.0,0.85,A,0.85,A,0.85,A,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/71.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/894.json,Counting & Probability,general,A,0.0,1.0,B,0.85,A,0.95,B,1.0,A
Bespoke-Stratos-7B,test/counting_and_probability/1009.json,Counting & Probability,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Bespoke-Stratos-7B,test/counting_and_probability/913.json,Counting & Probability,general,A,0.0,0.95,B,0.98,B,0.98,B,0.95,B
Bespoke-Stratos-7B,test/counting_and_probability/25149.json,Counting & Probability,general,A,1.0,0.95,A,0.75,A,0.95,B,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/339.json,Counting & Probability,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/counting_and_probability/870.json,Counting & Probability,general,A,1.0,0.85,A,0.75,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/216.json,Counting & Probability,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/737.json,Counting & Probability,general,B,1.0,0.95,B,0.9,A,0.95,B,0.9,A
Bespoke-Stratos-7B,test/counting_and_probability/116.json,Counting & Probability,general,B,1.0,0.9,B,0.85,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/counting_and_probability/238.json,Counting & Probability,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/1014.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.98,A,1.0,B
Bespoke-Stratos-7B,test/counting_and_probability/14.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Bespoke-Stratos-7B,test/counting_and_probability/188.json,Counting & Probability,general,A,1.0,0.95,B,0.85,A,0.9,A,0.9,A
Bespoke-Stratos-7B,test/counting_and_probability/761.json,Counting & Probability,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/counting_and_probability/10.json,Counting & Probability,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Bespoke-Stratos-7B,test/counting_and_probability/731.json,Counting & Probability,general,B,1.0,0.95,B,0.75,B,0.85,B,0.75,A
Bespoke-Stratos-7B,test/counting_and_probability/190.json,Counting & Probability,general,A,1.0,0.95,A,0.85,B,0.95,A,0.95,B
Bespoke-Stratos-7B,test/counting_and_probability/1003.json,Counting & Probability,general,B,1.0,0.95,B,0.95,B,0.98,B,0.95,A
Bespoke-Stratos-7B,test/counting_and_probability/199.json,Counting & Probability,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,A
JiuZhang3.0-7B,test/precalculus/807.json,Precalculus,general,B,0.0,1.0,A,0.95,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/precalculus/927.json,Precalculus,general,A,1.0,1.0,A,0.6,A,1.0,,,A
JiuZhang3.0-7B,test/precalculus/1303.json,Precalculus,general,B,0.0,1.0,A,0.51,A,0.5,A,0.3,B
JiuZhang3.0-7B,test/precalculus/990.json,Precalculus,general,Tie,0.5,0.5,B,0.7,A,0.85,,,B
JiuZhang3.0-7B,test/precalculus/1199.json,Precalculus,general,A,1.0,1.0,B,0.75,A,0.9,,,A
JiuZhang3.0-7B,test/precalculus/779.json,Precalculus,general,Tie,0.5,0.5,B,1.0,B,0.5,B,1.0,B
JiuZhang3.0-7B,test/precalculus/285.json,Precalculus,general,Tie,0.5,0.5,A,0.65,B,0.9,A,0.6,A
JiuZhang3.0-7B,test/precalculus/1105.json,Precalculus,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/precalculus/675.json,Precalculus,general,B,1.0,1.0,B,0.9,B,0.9,B,0.6,A
JiuZhang3.0-7B,test/precalculus/1146.json,Precalculus,general,Tie,0.5,0.5,A,0.75,A,0.9,A,0.7,B
JiuZhang3.0-7B,test/precalculus/1313.json,Precalculus,general,A,0.0,0.5,B,0.6,B,0.9,B,0.85,A
JiuZhang3.0-7B,test/precalculus/24313.json,Precalculus,general,B,0.0,1.0,A,0.95,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/precalculus/34.json,Precalculus,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,B
JiuZhang3.0-7B,test/precalculus/1300.json,Precalculus,general,A,1.0,0.5,A,0.95,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/precalculus/44.json,Precalculus,general,B,1.0,1.0,A,0.6,B,0.85,B,0.7,B
JiuZhang3.0-7B,test/precalculus/477.json,Precalculus,general,Tie,0.5,0.5,B,0.85,A,0.95,,,B
JiuZhang3.0-7B,test/precalculus/43.json,Precalculus,general,Tie,0.5,0.5,B,0.9,B,0.95,A,0.95,B
JiuZhang3.0-7B,test/precalculus/986.json,Precalculus,general,B,1.0,0.5,B,0.6,B,0.9,B,0.7,B
JiuZhang3.0-7B,test/precalculus/117.json,Precalculus,general,B,0.0,0.5,B,0.65,A,0.7,A,0.6,B
JiuZhang3.0-7B,test/precalculus/697.json,Precalculus,general,Tie,0.5,0.5,B,0.6,A,0.5,B,0.65,B
JiuZhang3.0-7B,test/precalculus/659.json,Precalculus,general,A,1.0,1.0,A,0.7,A,0.85,,,A
JiuZhang3.0-7B,test/precalculus/263.json,Precalculus,general,B,1.0,0.95,B,0.85,B,0.95,B,0.8,B
JiuZhang3.0-7B,test/precalculus/541.json,Precalculus,general,Tie,0.5,0.5,A,0.85,A,0.9,,,A
JiuZhang3.0-7B,test/precalculus/190.json,Precalculus,general,Tie,0.5,0.5,B,0.85,B,0.5,A,0.95,A
JiuZhang3.0-7B,test/precalculus/819.json,Precalculus,general,Tie,0.5,0.5,B,0.75,B,0.95,B,1.0,B
JiuZhang3.0-7B,test/precalculus/1056.json,Precalculus,general,Tie,0.5,0.5,A,0.85,A,0.7,A,0.7,B
JiuZhang3.0-7B,test/precalculus/441.json,Precalculus,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,B
JiuZhang3.0-7B,test/precalculus/989.json,Precalculus,general,A,0.0,0.5,B,0.6,B,0.85,A,0.7,B
JiuZhang3.0-7B,test/precalculus/920.json,Precalculus,general,A,1.0,1.0,A,1.0,A,0.95,A,0.9,A
JiuZhang3.0-7B,test/precalculus/452.json,Precalculus,general,Tie,0.5,0.5,B,0.65,B,0.85,B,0.8,A
JiuZhang3.0-7B,test/precalculus/580.json,Precalculus,general,Tie,0.5,0.5,B,0.95,B,0.95,,,A
JiuZhang3.0-7B,test/precalculus/768.json,Precalculus,general,A,0.0,0.9,A,0.7,B,0.95,,,A
JiuZhang3.0-7B,test/precalculus/1172.json,Precalculus,general,Tie,0.5,0.5,B,0.65,B,0.85,,,B
JiuZhang3.0-7B,test/precalculus/1201.json,Precalculus,general,B,1.0,1.0,B,0.6,B,0.9,B,0.3,B
JiuZhang3.0-7B,test/precalculus/881.json,Precalculus,general,A,0.0,0.5,A,0.7,B,0.9,,,A
JiuZhang3.0-7B,test/precalculus/695.json,Precalculus,general,Tie,0.5,0.5,A,0.7,B,0.95,B,0.9,A
JiuZhang3.0-7B,test/precalculus/742.json,Precalculus,general,Tie,0.5,0.5,B,0.6,B,0.9,B,0.6,B
JiuZhang3.0-7B,test/precalculus/801.json,Precalculus,general,Tie,0.5,0.5,B,0.6,B,0.85,A,0.7,A
JiuZhang3.0-7B,test/precalculus/826.json,Precalculus,general,B,1.0,0.5,A,0.85,B,0.5,B,0.7,A
JiuZhang3.0-7B,test/precalculus/1281.json,Precalculus,general,B,0.0,0.5,A,0.7,A,0.5,,,A
JiuZhang3.0-7B,test/precalculus/96.json,Precalculus,general,B,1.0,0.9,B,0.95,B,0.98,B,0.9,B
JiuZhang3.0-7B,test/precalculus/1289.json,Precalculus,general,A,0.0,1.0,B,0.95,A,0.95,B,0.95,B
JiuZhang3.0-7B,test/precalculus/902.json,Precalculus,general,B,0.0,1.0,A,0.6,A,0.5,,,B
JiuZhang3.0-7B,test/precalculus/1291.json,Precalculus,general,Tie,0.5,0.5,A,0.65,B,0.7,B,0.7,B
JiuZhang3.0-7B,test/precalculus/398.json,Precalculus,general,A,1.0,0.5,A,0.6,A,0.6,A,0.6,A
JiuZhang3.0-7B,test/precalculus/681.json,Precalculus,general,A,1.0,0.9,A,0.85,A,0.4,A,0.6,A
JiuZhang3.0-7B,test/precalculus/145.json,Precalculus,general,Tie,0.5,0.5,B,0.65,A,0.5,B,0.6,B
JiuZhang3.0-7B,test/precalculus/625.json,Precalculus,general,B,0.0,1.0,A,0.9,A,0.95,B,0.9,A
JiuZhang3.0-7B,test/precalculus/1202.json,Precalculus,general,B,1.0,1.0,B,0.7,A,0.5,,,A
JiuZhang3.0-7B,test/precalculus/1133.json,Precalculus,general,Tie,0.5,0.5,B,0.6,B,0.75,B,0.8,A
JiuZhang3.0-7B,test/precalculus/499.json,Precalculus,general,Tie,0.5,0.5,B,0.85,B,0.9,B,0.7,A
JiuZhang3.0-7B,test/precalculus/323.json,Precalculus,general,Tie,0.5,0.5,B,0.75,B,0.7,B,0.5,A
JiuZhang3.0-7B,test/precalculus/703.json,Precalculus,general,Tie,0.5,0.5,B,0.65,B,0.95,B,0.6,A
JiuZhang3.0-7B,test/precalculus/1252.json,Precalculus,general,A,1.0,1.0,A,1.0,A,0.9,A,0.6,A
JiuZhang3.0-7B,test/precalculus/1082.json,Precalculus,general,A,1.0,0.75,A,0.6,A,0.5,A,0.6,B
JiuZhang3.0-7B,test/precalculus/356.json,Precalculus,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/intermediate_algebra/1994.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.6,A,0.5,A,0.4,A
JiuZhang3.0-7B,test/intermediate_algebra/1197.json,Intermediate Algebra,general,A,0.0,0.8,B,0.85,B,0.85,B,0.7,B
JiuZhang3.0-7B,test/intermediate_algebra/134.json,Intermediate Algebra,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/intermediate_algebra/1000.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.9,A,0.4,B,0.95,B
JiuZhang3.0-7B,test/intermediate_algebra/607.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.6,A,0.7,,,B
JiuZhang3.0-7B,test/intermediate_algebra/1388.json,Intermediate Algebra,general,A,0.0,0.5,B,0.85,B,0.85,,,B
JiuZhang3.0-7B,test/intermediate_algebra/428.json,Intermediate Algebra,general,A,1.0,1.0,A,0.7,A,0.7,A,0.7,A
JiuZhang3.0-7B,test/intermediate_algebra/1454.json,Intermediate Algebra,general,A,1.0,0.5,A,0.7,A,0.5,A,0.6,A
JiuZhang3.0-7B,test/intermediate_algebra/1217.json,Intermediate Algebra,general,A,0.0,1.0,B,0.95,B,0.99,B,0.95,A
JiuZhang3.0-7B,test/intermediate_algebra/1168.json,Intermediate Algebra,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/intermediate_algebra/956.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.7,B,0.75,A,0.6,A
JiuZhang3.0-7B,test/intermediate_algebra/1247.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,A,1.0,A,0.9,B
JiuZhang3.0-7B,test/intermediate_algebra/279.json,Intermediate Algebra,general,Tie,0.5,0.5,B,1.0,B,0.99,B,1.0,A
JiuZhang3.0-7B,test/intermediate_algebra/207.json,Intermediate Algebra,general,B,1.0,0.5,B,0.75,A,0.9,B,0.9,A
JiuZhang3.0-7B,test/intermediate_algebra/623.json,Intermediate Algebra,general,B,0.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/intermediate_algebra/47.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.0,A,0.85,A,0.7,A
JiuZhang3.0-7B,test/intermediate_algebra/1849.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.7,A,0.85,A,0.9,A
JiuZhang3.0-7B,test/intermediate_algebra/2046.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.75,B,0.85,A,0.7,B
JiuZhang3.0-7B,test/intermediate_algebra/662.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.6,B,0.6,A,0.6,A
JiuZhang3.0-7B,test/intermediate_algebra/582.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.6,B,0.7,,,A
JiuZhang3.0-7B,test/intermediate_algebra/431.json,Intermediate Algebra,general,B,1.0,0.8,B,0.8,B,0.95,B,0.95,B
JiuZhang3.0-7B,test/intermediate_algebra/558.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.5,A,0.5,A,0.4,B
JiuZhang3.0-7B,test/intermediate_algebra/362.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.65,B,0.7,,,B
JiuZhang3.0-7B,test/intermediate_algebra/515.json,Intermediate Algebra,general,A,1.0,1.0,A,0.51,A,0.95,A,0.95,B
JiuZhang3.0-7B,test/intermediate_algebra/894.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.5,B,0.5,B,0.6,B
JiuZhang3.0-7B,test/intermediate_algebra/345.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.6,B,0.6,A,0.6,B
JiuZhang3.0-7B,test/intermediate_algebra/1898.json,Intermediate Algebra,general,B,0.0,0.5,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/intermediate_algebra/232.json,Intermediate Algebra,general,A,0.0,0.8,A,0.6,B,0.7,B,0.7,B
JiuZhang3.0-7B,test/intermediate_algebra/128.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.7,A,0.7,A,0.7,A
JiuZhang3.0-7B,test/intermediate_algebra/1063.json,Intermediate Algebra,general,A,0.0,1.0,A,0.6,B,0.85,,,B
JiuZhang3.0-7B,test/intermediate_algebra/1126.json,Intermediate Algebra,general,Tie,0.5,0.5,B,1.0,B,0.7,B,0.3,B
JiuZhang3.0-7B,test/intermediate_algebra/2022.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.6,B,0.9,A,0.7,B
JiuZhang3.0-7B,test/intermediate_algebra/1151.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.65,B,0.7,A,0.7,B
JiuZhang3.0-7B,test/intermediate_algebra/1408.json,Intermediate Algebra,general,A,1.0,1.0,A,0.9,A,0.5,A,0.4,B
JiuZhang3.0-7B,test/intermediate_algebra/966.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.75,B,0.9,,,A
JiuZhang3.0-7B,test/intermediate_algebra/964.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.65,B,0.6,B,0.65,B
JiuZhang3.0-7B,test/intermediate_algebra/1410.json,Intermediate Algebra,general,A,0.0,1.0,A,0.9,B,0.95,B,0.9,B
JiuZhang3.0-7B,test/intermediate_algebra/991.json,Intermediate Algebra,general,A,0.0,0.5,B,0.75,B,0.9,B,0.9,A
JiuZhang3.0-7B,test/intermediate_algebra/183.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.65,A,0.5,A,0.7,A
JiuZhang3.0-7B,test/intermediate_algebra/1422.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,A,0.9,A,0.8,B
JiuZhang3.0-7B,test/intermediate_algebra/2196.json,Intermediate Algebra,general,B,0.0,1.0,A,0.95,B,0.5,A,0.7,A
JiuZhang3.0-7B,test/intermediate_algebra/591.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.9,,,A
JiuZhang3.0-7B,test/intermediate_algebra/1555.json,Intermediate Algebra,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/intermediate_algebra/1510.json,Intermediate Algebra,general,B,1.0,1.0,B,0.3,B,0.6,A,0.3,B
JiuZhang3.0-7B,test/intermediate_algebra/102.json,Intermediate Algebra,general,B,1.0,0.5,B,0.65,B,0.6,,,A
JiuZhang3.0-7B,test/intermediate_algebra/986.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.65,B,0.85,B,0.7,B
JiuZhang3.0-7B,test/intermediate_algebra/1354.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.6,B,0.85,A,0.65,B
JiuZhang3.0-7B,test/intermediate_algebra/1837.json,Intermediate Algebra,general,A,0.0,0.9,B,0.7,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/intermediate_algebra/337.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.85,B,0.95,B,0.7,B
JiuZhang3.0-7B,test/intermediate_algebra/1210.json,Intermediate Algebra,general,A,1.0,1.0,B,0.7,A,0.7,A,0.6,A
JiuZhang3.0-7B,test/intermediate_algebra/1123.json,Intermediate Algebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/intermediate_algebra/149.json,Intermediate Algebra,general,A,0.0,1.0,B,0.9,A,0.95,B,0.95,A
JiuZhang3.0-7B,test/intermediate_algebra/1411.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.6,B,0.1,B,1.0,B
JiuZhang3.0-7B,test/intermediate_algebra/960.json,Intermediate Algebra,general,A,0.0,1.0,B,0.7,B,0.9,B,0.6,A
JiuZhang3.0-7B,test/intermediate_algebra/1300.json,Intermediate Algebra,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/intermediate_algebra/90.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.5,A,0.75,A,0.75,A
JiuZhang3.0-7B,test/intermediate_algebra/754.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.8,B,0.95,,,B
JiuZhang3.0-7B,test/intermediate_algebra/446.json,Intermediate Algebra,general,Tie,0.5,0.5,B,1.0,B,0.95,B,1.0,A
JiuZhang3.0-7B,test/intermediate_algebra/1544.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.7,A,0.6,,,B
JiuZhang3.0-7B,test/intermediate_algebra/1714.json,Intermediate Algebra,general,A,0.0,1.0,B,0.6,A,0.7,B,0.7,B
JiuZhang3.0-7B,test/intermediate_algebra/2152.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.6,A,0.5,A,0.7,B
JiuZhang3.0-7B,test/intermediate_algebra/117.json,Intermediate Algebra,general,B,1.0,0.9,B,0.85,B,0.85,,,A
JiuZhang3.0-7B,test/intermediate_algebra/190.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.65,A,0.6,A,0.7,B
JiuZhang3.0-7B,test/intermediate_algebra/776.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.65,A,0.5,B,0.3,A
JiuZhang3.0-7B,test/intermediate_algebra/1566.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.7,B,0.85,,,A
JiuZhang3.0-7B,test/intermediate_algebra/1572.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.65,A,0.95,A,0.95,B
JiuZhang3.0-7B,test/intermediate_algebra/1166.json,Intermediate Algebra,general,B,0.0,0.5,A,0.85,A,0.7,,,A
JiuZhang3.0-7B,test/intermediate_algebra/860.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.95,B,0.9,B,0.9,B
JiuZhang3.0-7B,test/intermediate_algebra/1407.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.55,A,0.7,A,0.8,A
JiuZhang3.0-7B,test/intermediate_algebra/1405.json,Intermediate Algebra,general,A,1.0,1.0,B,0.6,A,0.6,A,0.3,B
JiuZhang3.0-7B,test/intermediate_algebra/690.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.75,B,0.7,,,A
JiuZhang3.0-7B,test/intermediate_algebra/955.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.6,B,0.85,B,0.4,A
JiuZhang3.0-7B,test/intermediate_algebra/1992.json,Intermediate Algebra,general,A,0.0,1.0,B,0.85,B,0.9,,,A
JiuZhang3.0-7B,test/intermediate_algebra/1111.json,Intermediate Algebra,general,B,0.0,0.5,A,0.65,A,0.5,,,A
JiuZhang3.0-7B,test/intermediate_algebra/1791.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.7,B,0.95,,,A
JiuZhang3.0-7B,test/intermediate_algebra/1806.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.65,B,0.5,A,0.6,A
JiuZhang3.0-7B,test/intermediate_algebra/1797.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,1.0,A
JiuZhang3.0-7B,test/intermediate_algebra/2146.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.7,B,0.5,B,0.4,B
JiuZhang3.0-7B,test/intermediate_algebra/2015.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.6,B,0.85,,,A
JiuZhang3.0-7B,test/intermediate_algebra/121.json,Intermediate Algebra,general,A,1.0,1.0,A,0.85,A,0.95,B,0.9,A
JiuZhang3.0-7B,test/intermediate_algebra/1014.json,Intermediate Algebra,general,Tie,0.5,0.5,A,1.0,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/intermediate_algebra/1462.json,Intermediate Algebra,general,B,1.0,0.5,B,0.85,B,0.7,B,0.5,B
JiuZhang3.0-7B,test/intermediate_algebra/199.json,Intermediate Algebra,general,A,0.0,1.0,B,0.85,B,0.95,A,1.0,B
JiuZhang3.0-7B,test/intermediate_algebra/1779.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.6,A,0.6,B,0.9,A
JiuZhang3.0-7B,test/intermediate_algebra/1102.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.6,A,0.9,A,0.9,A
JiuZhang3.0-7B,test/intermediate_algebra/834.json,Intermediate Algebra,general,B,1.0,0.5,B,0.85,B,0.85,B,0.8,B
JiuZhang3.0-7B,test/intermediate_algebra/158.json,Intermediate Algebra,general,A,0.0,1.0,B,0.65,B,0.85,A,0.7,B
JiuZhang3.0-7B,test/intermediate_algebra/752.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/intermediate_algebra/1279.json,Intermediate Algebra,general,Tie,0.5,0.5,A,1.0,A,0.3,A,0.6,B
JiuZhang3.0-7B,test/intermediate_algebra/1467.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.95,B,0.5,B,0.3,B
JiuZhang3.0-7B,test/intermediate_algebra/101.json,Intermediate Algebra,general,B,1.0,0.9,B,1.0,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/intermediate_algebra/1365.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.7,B,1.0,,,A
JiuZhang3.0-7B,test/intermediate_algebra/1350.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.85,B,0.85,B,0.7,A
JiuZhang3.0-7B,test/intermediate_algebra/1930.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.6,A,0.6,A,0.6,A
JiuZhang3.0-7B,test/intermediate_algebra/1981.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.6,A,0.7,A,0.8,A
JiuZhang3.0-7B,test/intermediate_algebra/1232.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.6,B,0.95,B,0.95,B
JiuZhang3.0-7B,test/intermediate_algebra/1508.json,Intermediate Algebra,general,Tie,0.5,0.5,B,0.75,B,0.85,,,B
JiuZhang3.0-7B,test/algebra/2584.json,Algebra,general,Tie,0.5,0.5,B,0.6,B,0.95,A,0.95,A
JiuZhang3.0-7B,test/algebra/1349.json,Algebra,general,A,1.0,1.0,A,0.51,A,0.95,B,0.9,B
JiuZhang3.0-7B,test/algebra/2036.json,Algebra,general,A,1.0,1.0,A,1.0,A,0.9,A,0.9,A
JiuZhang3.0-7B,test/algebra/1098.json,Algebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,0.95,B
JiuZhang3.0-7B,test/algebra/1837.json,Algebra,general,Tie,0.5,0.5,A,0.6,B,0.7,B,0.9,B
JiuZhang3.0-7B,test/algebra/2193.json,Algebra,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/algebra/2427.json,Algebra,general,Tie,0.5,0.5,B,0.9,B,0.7,A,1.0,B
JiuZhang3.0-7B,test/algebra/1072.json,Algebra,general,A,0.0,0.85,B,1.0,B,1.0,B,0.95,B
JiuZhang3.0-7B,test/algebra/24.json,Algebra,general,B,1.0,0.5,B,0.95,B,0.99,B,1.0,B
JiuZhang3.0-7B,test/algebra/2214.json,Algebra,general,Tie,0.5,0.5,B,0.95,B,0.98,B,1.0,B
JiuZhang3.0-7B,test/algebra/305.json,Algebra,general,B,1.0,1.0,B,0.8,A,0.95,B,0.95,A
JiuZhang3.0-7B,test/algebra/1265.json,Algebra,general,B,1.0,1.0,B,0.9,B,0.6,B,0.3,B
JiuZhang3.0-7B,test/algebra/187.json,Algebra,general,Tie,0.5,0.5,A,0.6,B,0.5,B,0.8,A
JiuZhang3.0-7B,test/algebra/769.json,Algebra,general,Tie,0.5,0.5,B,0.9,A,0.95,B,0.95,B
JiuZhang3.0-7B,test/algebra/722.json,Algebra,general,Tie,0.5,0.5,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/algebra/2046.json,Algebra,general,Tie,0.5,0.5,B,0.85,B,0.95,B,0.95,B
JiuZhang3.0-7B,test/algebra/2253.json,Algebra,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/algebra/1004.json,Algebra,general,B,0.0,1.0,A,0.55,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/algebra/1035.json,Algebra,general,Tie,0.5,0.5,B,0.51,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/algebra/2700.json,Algebra,general,Tie,0.5,0.5,A,0.6,A,0.9,A,0.9,B
JiuZhang3.0-7B,test/algebra/893.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,,,A
JiuZhang3.0-7B,test/algebra/567.json,Algebra,general,A,0.0,1.0,B,0.95,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/algebra/892.json,Algebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,0.95,B
JiuZhang3.0-7B,test/algebra/2023.json,Algebra,general,Tie,0.5,0.5,B,0.75,B,0.9,B,0.7,B
JiuZhang3.0-7B,test/algebra/873.json,Algebra,general,B,0.0,1.0,A,0.9,B,0.9,A,0.8,B
JiuZhang3.0-7B,test/algebra/2058.json,Algebra,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/algebra/2593.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.9,A
JiuZhang3.0-7B,test/algebra/2157.json,Algebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/algebra/2251.json,Algebra,general,A,1.0,1.0,A,0.9,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/algebra/1332.json,Algebra,general,Tie,0.5,0.5,A,0.6,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/algebra/972.json,Algebra,general,B,1.0,1.0,B,1.0,B,0.8,B,0.7,A
JiuZhang3.0-7B,test/algebra/2232.json,Algebra,general,Tie,0.5,0.5,B,0.75,A,0.95,B,0.95,A
JiuZhang3.0-7B,test/algebra/661.json,Algebra,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/algebra/246.json,Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/algebra/1519.json,Algebra,general,Solution B,0.0,0.5,A,0.98,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/algebra/988.json,Algebra,general,B,1.0,1.0,B,0.95,B,0.95,B,0.95,B
JiuZhang3.0-7B,test/algebra/2570.json,Algebra,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/algebra/621.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,A,0.9,B
JiuZhang3.0-7B,test/algebra/1255.json,Algebra,general,B,1.0,1.0,B,0.95,B,0.98,B,1.0,A
JiuZhang3.0-7B,test/algebra/2517.json,Algebra,general,A,1.0,1.0,A,0.6,A,0.95,A,0.1,A
JiuZhang3.0-7B,test/algebra/478.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/algebra/297.json,Algebra,general,B,1.0,1.0,A,0.6,B,0.9,B,1.0,A
JiuZhang3.0-7B,test/algebra/841.json,Algebra,general,Tie,0.5,0.5,B,0.95,A,0.95,B,0.95,A
JiuZhang3.0-7B,test/algebra/686.json,Algebra,general,B,1.0,1.0,B,0.95,B,0.7,A,0.9,B
JiuZhang3.0-7B,test/algebra/351.json,Algebra,general,B,0.0,0.9,A,1.0,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/algebra/1275.json,Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,B
JiuZhang3.0-7B,test/algebra/1082.json,Algebra,general,A,1.0,1.0,A,0.9,A,0.95,A,0.6,A
JiuZhang3.0-7B,test/algebra/1214.json,Algebra,general,B,1.0,1.0,B,0.95,B,0.95,B,1.0,A
JiuZhang3.0-7B,test/algebra/2199.json,Algebra,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.95,B
JiuZhang3.0-7B,test/algebra/733.json,Algebra,general,A,0.0,1.0,A,0.85,B,0.6,B,0.7,A
JiuZhang3.0-7B,test/algebra/109.json,Algebra,general,Tie,0.5,0.5,A,0.51,A,0.95,A,1.0,B
JiuZhang3.0-7B,test/algebra/1937.json,Algebra,general,A,0.0,1.0,B,0.8,B,0.9,B,0.7,B
JiuZhang3.0-7B,test/algebra/291.json,Algebra,general,Tie,0.5,0.5,A,0.65,B,0.8,B,0.8,B
JiuZhang3.0-7B,test/algebra/2102.json,Algebra,general,Tie,0.5,0.5,B,0.6,A,0.6,A,0.2,B
JiuZhang3.0-7B,test/algebra/907.json,Algebra,general,Tie,0.5,0.5,B,1.0,B,0.98,B,1.0,A
JiuZhang3.0-7B,test/algebra/864.json,Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/algebra/2159.json,Algebra,general,Tie,0.5,0.5,B,0.7,B,0.95,B,0.7,B
JiuZhang3.0-7B,test/algebra/1578.json,Algebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,0.95,A
JiuZhang3.0-7B,test/algebra/975.json,Algebra,general,Tie,0.5,0.5,A,1.0,A,0.9,A,0.6,A
JiuZhang3.0-7B,test/algebra/1143.json,Algebra,general,A,0.0,1.0,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/algebra/2626.json,Algebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,0.7,A
JiuZhang3.0-7B,test/algebra/1787.json,Algebra,general,B,1.0,1.0,B,0.7,B,0.7,A,0.7,A
JiuZhang3.0-7B,test/algebra/1934.json,Algebra,general,Tie,0.5,0.5,A,0.55,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/algebra/2064.json,Algebra,general,A,1.0,1.0,A,1.0,A,0.9,A,0.7,B
JiuZhang3.0-7B,test/algebra/694.json,Algebra,general,A,1.0,1.0,A,0.55,A,0.95,B,0.95,A
JiuZhang3.0-7B,test/algebra/524.json,Algebra,general,Tie,0.5,0.5,A,0.9,A,0.95,A,0.95,B
JiuZhang3.0-7B,test/algebra/2551.json,Algebra,general,B,1.0,1.0,B,0.55,B,0.95,B,1.0,A
JiuZhang3.0-7B,test/algebra/346.json,Algebra,general,Tie,0.5,0.5,B,0.55,A,0.95,B,0.95,B
JiuZhang3.0-7B,test/algebra/1282.json,Algebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/algebra/1184.json,Algebra,general,Tie,0.5,0.5,B,0.75,B,0.9,,,B
JiuZhang3.0-7B,test/algebra/634.json,Algebra,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/algebra/2486.json,Algebra,general,Tie,0.5,0.5,A,1.0,A,0.95,A,0.9,A
JiuZhang3.0-7B,test/algebra/2257.json,Algebra,general,Tie,0.5,0.5,A,1.0,A,0.95,A,1.0,B
JiuZhang3.0-7B,test/algebra/1842.json,Algebra,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/algebra/791.json,Algebra,general,Tie,0.5,0.5,A,0.65,A,0.7,A,0.6,B
JiuZhang3.0-7B,test/algebra/276.json,Algebra,general,B,0.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/algebra/2735.json,Algebra,general,B,1.0,1.0,B,0.6,A,0.95,B,0.95,A
JiuZhang3.0-7B,test/algebra/425.json,Algebra,general,A,1.0,1.0,A,0.9,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/algebra/1936.json,Algebra,general,B,1.0,1.0,B,0.55,A,0.95,B,0.95,B
JiuZhang3.0-7B,test/algebra/2176.json,Algebra,general,A,1.0,1.0,B,0.6,A,0.95,,,B
JiuZhang3.0-7B,test/algebra/509.json,Algebra,general,A,0.0,0.5,B,0.85,B,0.95,B,0.8,B
JiuZhang3.0-7B,test/algebra/1457.json,Algebra,general,A,1.0,0.5,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/algebra/2592.json,Algebra,general,Tie,0.5,0.5,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/algebra/858.json,Algebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,1.0,B
JiuZhang3.0-7B,test/algebra/1529.json,Algebra,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/algebra/1338.json,Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/algebra/1547.json,Algebra,general,B,0.0,1.0,B,0.8,A,0.95,A,0.9,A
JiuZhang3.0-7B,test/algebra/529.json,Algebra,general,B,1.0,0.5,B,0.85,B,0.95,B,0.9,B
JiuZhang3.0-7B,test/algebra/1078.json,Algebra,general,Tie,0.5,0.5,B,0.85,B,0.9,B,0.7,A
JiuZhang3.0-7B,test/algebra/251.json,Algebra,general,B,1.0,1.0,A,0.85,B,0.95,B,1.0,B
JiuZhang3.0-7B,test/algebra/1199.json,Algebra,general,A,1.0,1.0,A,0.55,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/algebra/2264.json,Algebra,general,Tie,0.5,0.5,B,0.85,B,0.98,B,0.95,B
JiuZhang3.0-7B,test/algebra/1303.json,Algebra,general,B,0.0,1.0,B,0.85,A,0.95,A,0.95,B
JiuZhang3.0-7B,test/algebra/101.json,Algebra,general,B,1.0,0.95,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/algebra/170.json,Algebra,general,Tie,0.5,0.5,A,0.55,B,0.95,A,0.95,B
JiuZhang3.0-7B,test/algebra/849.json,Algebra,general,B,1.0,1.0,B,0.6,A,0.95,B,0.95,A
JiuZhang3.0-7B,test/algebra/1031.json,Algebra,general,A,0.0,1.0,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/algebra/853.json,Algebra,general,Tie,0.5,0.5,B,0.65,B,0.9,,,B
JiuZhang3.0-7B,test/algebra/2277.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.8,A,0.95,A
JiuZhang3.0-7B,test/algebra/518.json,Algebra,general,Tie,0.5,0.5,B,1.0,B,0.98,B,1.0,A
JiuZhang3.0-7B,test/algebra/114.json,Algebra,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/algebra/1960.json,Algebra,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/algebra/2680.json,Algebra,general,B,1.0,1.0,B,0.95,B,0.98,B,0.95,B
JiuZhang3.0-7B,test/algebra/2391.json,Algebra,general,Tie,0.5,0.5,B,0.85,B,0.85,B,0.8,B
JiuZhang3.0-7B,test/algebra/776.json,Algebra,general,Tie,0.5,0.5,B,0.5,B,0.9,B,0.7,A
JiuZhang3.0-7B,test/algebra/1796.json,Algebra,general,B,0.0,1.0,A,0.85,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/algebra/1339.json,Algebra,general,Tie,0.5,0.5,B,0.95,A,1.0,A,0.95,B
JiuZhang3.0-7B,test/algebra/2743.json,Algebra,general,Tie,0.5,0.5,A,0.95,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/algebra/2043.json,Algebra,general,A,0.0,1.0,B,1.0,B,0.98,B,1.0,A
JiuZhang3.0-7B,test/algebra/1553.json,Algebra,general,A,1.0,0.9,A,0.8,A,0.9,A,0.9,A
JiuZhang3.0-7B,test/algebra/2080.json,Algebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/algebra/1343.json,Algebra,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/algebra/668.json,Algebra,general,B,0.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/algebra/2430.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.9,A,0.6,A
JiuZhang3.0-7B,test/algebra/2789.json,Algebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,0.95,B
JiuZhang3.0-7B,test/algebra/1814.json,Algebra,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/algebra/2476.json,Algebra,general,Tie,0.5,0.5,A,0.6,B,0.9,A,0.7,B
JiuZhang3.0-7B,test/algebra/2780.json,Algebra,general,Tie,0.5,0.5,A,0.85,B,0.5,B,0.7,A
JiuZhang3.0-7B,test/algebra/824.json,Algebra,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/algebra/1425.json,Algebra,general,B,0.0,1.0,A,0.5,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/algebra/224.json,Algebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/algebra/435.json,Algebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,0.95,B
JiuZhang3.0-7B,test/algebra/2470.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,B
JiuZhang3.0-7B,test/algebra/2779.json,Algebra,general,Tie,0.5,0.5,B,1.0,B,0.9,B,0.6,A
JiuZhang3.0-7B,test/number_theory/572.json,Number Theory,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/number_theory/515.json,Number Theory,general,Tie,0.5,0.5,B,1.0,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/number_theory/1032.json,Number Theory,general,A,0.0,1.0,B,0.6,B,0.9,B,0.7,B
JiuZhang3.0-7B,test/number_theory/737.json,Number Theory,general,A,0.0,0.85,B,0.85,B,0.9,B,0.9,B
JiuZhang3.0-7B,test/number_theory/864.json,Number Theory,general,Tie,0.5,0.5,A,0.95,A,0.95,B,0.95,A
JiuZhang3.0-7B,test/number_theory/627.json,Number Theory,general,A,1.0,1.0,A,0.9,A,0.85,A,0.7,B
JiuZhang3.0-7B,test/number_theory/45.json,Number Theory,general,B,1.0,0.5,B,0.95,B,0.95,B,0.95,B
JiuZhang3.0-7B,test/number_theory/1055.json,Number Theory,general,Tie,0.5,0.5,B,0.85,B,0.85,B,0.6,B
JiuZhang3.0-7B,test/number_theory/46.json,Number Theory,general,Tie,0.5,0.5,B,1.0,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/number_theory/516.json,Number Theory,general,A,0.0,1.0,B,0.65,B,0.85,A,0.8,B
JiuZhang3.0-7B,test/number_theory/357.json,Number Theory,general,Tie,0.5,0.5,A,0.85,A,0.85,B,0.7,A
JiuZhang3.0-7B,test/number_theory/914.json,Number Theory,general,B,1.0,1.0,A,0.8,B,0.9,B,0.6,B
JiuZhang3.0-7B,test/number_theory/847.json,Number Theory,general,B,0.0,1.0,A,0.75,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/number_theory/753.json,Number Theory,general,B,0.0,0.5,A,0.7,A,0.85,A,0.8,A
JiuZhang3.0-7B,test/number_theory/1257.json,Number Theory,general,Tie,0.5,0.5,B,0.6,B,1.0,,,B
JiuZhang3.0-7B,test/number_theory/156.json,Number Theory,general,Tie,0.5,0.5,A,0.9,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/number_theory/612.json,Number Theory,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/number_theory/931.json,Number Theory,general,Tie,0.5,0.5,B,0.85,B,0.85,B,0.6,B
JiuZhang3.0-7B,test/number_theory/521.json,Number Theory,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/number_theory/598.json,Number Theory,general,Tie,0.5,0.5,B,0.95,B,0.98,B,1.0,A
JiuZhang3.0-7B,test/number_theory/978.json,Number Theory,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.8,A
JiuZhang3.0-7B,test/number_theory/838.json,Number Theory,general,Tie,0.5,0.5,B,0.75,B,0.5,B,0.7,A
JiuZhang3.0-7B,test/number_theory/149.json,Number Theory,general,A,0.0,1.0,B,0.65,B,0.85,A,0.7,A
JiuZhang3.0-7B,test/number_theory/1201.json,Number Theory,general,B,0.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/number_theory/234.json,Number Theory,general,B,0.0,1.0,A,0.55,B,0.9,A,0.6,A
JiuZhang3.0-7B,test/number_theory/417.json,Number Theory,general,Tie,0.5,0.5,B,0.6,B,0.6,B,0.2,B
JiuZhang3.0-7B,test/number_theory/89.json,Number Theory,general,A,1.0,1.0,A,0.85,A,0.9,A,0.6,A
JiuZhang3.0-7B,test/number_theory/183.json,Number Theory,general,Tie,0.5,0.5,B,0.75,B,0.9,A,0.6,A
JiuZhang3.0-7B,test/number_theory/1065.json,Number Theory,general,A,0.0,1.0,B,1.0,B,1.0,B,0.9,A
JiuZhang3.0-7B,test/number_theory/466.json,Number Theory,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.95,B
JiuZhang3.0-7B,test/number_theory/634.json,Number Theory,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/number_theory/533.json,Number Theory,general,Tie,0.5,0.5,B,0.8,B,0.95,B,0.7,A
JiuZhang3.0-7B,test/number_theory/691.json,Number Theory,general,Tie,0.5,0.5,B,0.55,A,0.95,B,1.0,A
JiuZhang3.0-7B,test/number_theory/1287.json,Number Theory,general,Tie,0.5,0.5,B,1.0,B,1.0,B,0.95,A
JiuZhang3.0-7B,test/number_theory/631.json,Number Theory,general,Tie,0.5,0.5,B,0.8,B,0.85,,,B
JiuZhang3.0-7B,test/number_theory/488.json,Number Theory,general,Tie,0.5,0.5,A,0.8,A,0.7,A,0.7,B
JiuZhang3.0-7B,test/number_theory/1172.json,Number Theory,general,Tie,0.5,0.5,B,0.95,B,0.98,B,0.9,B
JiuZhang3.0-7B,test/number_theory/203.json,Number Theory,general,Tie,0.5,0.5,B,0.6,A,0.5,A,0.7,A
JiuZhang3.0-7B,test/number_theory/911.json,Number Theory,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.8,B
JiuZhang3.0-7B,test/number_theory/483.json,Number Theory,general,Tie,0.5,0.5,A,1.0,A,1.0,A,0.9,A
JiuZhang3.0-7B,test/number_theory/368.json,Number Theory,general,A,0.0,1.0,B,0.95,B,0.95,B,1.0,B
JiuZhang3.0-7B,test/number_theory/686.json,Number Theory,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/number_theory/820.json,Number Theory,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/number_theory/109.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.9,B,0.9,A
JiuZhang3.0-7B,test/number_theory/427.json,Number Theory,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,B
JiuZhang3.0-7B,test/number_theory/1185.json,Number Theory,general,Tie,0.5,0.5,B,0.95,B,0.95,B,0.9,A
JiuZhang3.0-7B,test/number_theory/928.json,Number Theory,general,Tie,0.5,0.5,B,0.85,A,0.9,,,B
JiuZhang3.0-7B,test/number_theory/132.json,Number Theory,general,B,0.0,1.0,A,0.55,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/number_theory/769.json,Number Theory,general,Tie,0.5,0.5,B,0.75,B,0.9,B,0.8,B
JiuZhang3.0-7B,test/number_theory/1002.json,Number Theory,general,Tie,0.5,0.5,B,0.65,B,0.85,B,0.65,B
JiuZhang3.0-7B,test/number_theory/410.json,Number Theory,general,Tie,0.5,0.5,B,0.95,B,0.95,B,0.95,B
JiuZhang3.0-7B,test/number_theory/255.json,Number Theory,general,B,1.0,1.0,B,0.65,A,0.9,B,0.9,A
JiuZhang3.0-7B,test/number_theory/1000.json,Number Theory,general,Tie,0.5,0.5,A,0.85,A,0.95,A,0.7,B
JiuZhang3.0-7B,test/number_theory/13.json,Number Theory,general,Tie,0.5,0.5,B,0.6,B,0.7,B,0.7,B
JiuZhang3.0-7B,test/number_theory/459.json,Number Theory,general,Tie,0.5,0.5,A,0.95,A,0.9,A,0.9,B
JiuZhang3.0-7B,test/number_theory/342.json,Number Theory,general,Tie,0.5,0.5,B,0.75,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/number_theory/679.json,Number Theory,general,A,1.0,0.9,A,0.95,A,0.95,A,0.8,A
JiuZhang3.0-7B,test/number_theory/72.json,Number Theory,general,Tie,0.5,0.5,A,0.9,A,0.9,A,0.8,A
JiuZhang3.0-7B,test/number_theory/22.json,Number Theory,general,Tie,0.5,0.5,A,1.0,A,0.99,A,1.0,A
JiuZhang3.0-7B,test/number_theory/1128.json,Number Theory,general,Tie,0.5,0.5,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/number_theory/1090.json,Number Theory,general,Tie,0.5,0.5,A,0.65,B,0.85,B,0.7,A
JiuZhang3.0-7B,test/number_theory/239.json,Number Theory,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/prealgebra/1622.json,Prealgebra,general,Tie,0.5,0.5,B,1.0,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/prealgebra/1139.json,Prealgebra,general,Tie,0.5,0.5,B,0.7,B,0.95,,,A
JiuZhang3.0-7B,test/prealgebra/1840.json,Prealgebra,general,Tie,0.5,0.5,B,0.6,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/prealgebra/1302.json,Prealgebra,general,B,0.0,1.0,A,0.85,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/prealgebra/930.json,Prealgebra,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/prealgebra/1558.json,Prealgebra,general,Tie,0.5,0.5,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/prealgebra/1388.json,Prealgebra,general,Tie,0.5,0.5,A,0.6,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/prealgebra/951.json,Prealgebra,general,Tie,0.5,0.5,A,0.95,A,0.98,A,1.0,A
JiuZhang3.0-7B,test/prealgebra/572.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/prealgebra/1247.json,Prealgebra,general,B,0.0,1.0,A,0.6,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/prealgebra/1747.json,Prealgebra,general,B,0.0,1.0,A,0.95,A,0.85,B,0.85,A
JiuZhang3.0-7B,test/prealgebra/1233.json,Prealgebra,general,Tie,0.5,0.5,A,0.6,A,0.95,A,1.0,B
JiuZhang3.0-7B,test/prealgebra/192.json,Prealgebra,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/prealgebra/307.json,Prealgebra,general,B,1.0,1.0,B,0.85,A,0.95,B,0.95,A
JiuZhang3.0-7B,test/prealgebra/1761.json,Prealgebra,general,B,0.0,1.0,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/prealgebra/1646.json,Prealgebra,general,Tie,0.5,0.5,A,0.65,A,0.85,A,0.7,B
JiuZhang3.0-7B,test/prealgebra/105.json,Prealgebra,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/prealgebra/1924.json,Prealgebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,1.0,A
JiuZhang3.0-7B,test/prealgebra/1804.json,Prealgebra,general,B,1.0,1.0,B,0.6,B,0.85,B,0.7,B
JiuZhang3.0-7B,test/prealgebra/1733.json,Prealgebra,general,Tie,0.5,0.5,B,0.1,A,0.5,A,0.3,A
JiuZhang3.0-7B,test/prealgebra/505.json,Prealgebra,general,Tie,0.5,0.5,A,0.95,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/prealgebra/1686.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,0.95,A,1.0,B
JiuZhang3.0-7B,test/prealgebra/1807.json,Prealgebra,general,A,1.0,1.0,A,0.51,A,0.9,A,0.6,B
JiuZhang3.0-7B,test/prealgebra/1297.json,Prealgebra,general,A,0.0,1.0,B,1.0,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/prealgebra/1655.json,Prealgebra,general,A,0.0,1.0,B,1.0,B,0.95,B,1.0,A
JiuZhang3.0-7B,test/prealgebra/1356.json,Prealgebra,general,A,0.0,1.0,B,0.6,B,0.95,B,1.0,A
JiuZhang3.0-7B,test/prealgebra/1003.json,Prealgebra,general,A,1.0,1.0,A,0.75,B,0.5,A,0.6,B
JiuZhang3.0-7B,test/prealgebra/1272.json,Prealgebra,general,Tie,0.5,0.5,A,0.8,A,0.95,A,1.0,B
JiuZhang3.0-7B,test/prealgebra/1113.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
JiuZhang3.0-7B,test/prealgebra/1908.json,Prealgebra,general,A,1.0,0.9,A,0.9,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/prealgebra/1922.json,Prealgebra,general,Tie,0.5,0.5,B,0.95,B,0.98,B,0.95,A
JiuZhang3.0-7B,test/prealgebra/1907.json,Prealgebra,general,Tie,0.5,0.5,A,0.55,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/prealgebra/2086.json,Prealgebra,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/prealgebra/378.json,Prealgebra,general,A,0.0,1.0,B,0.75,B,0.85,A,0.7,B
JiuZhang3.0-7B,test/prealgebra/1555.json,Prealgebra,general,B,0.0,0.9,A,0.51,A,1.0,A,0.95,B
JiuZhang3.0-7B,test/prealgebra/1436.json,Prealgebra,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/prealgebra/1961.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/prealgebra/2057.json,Prealgebra,general,B,0.0,1.0,A,0.75,A,0.95,B,0.95,B
JiuZhang3.0-7B,test/prealgebra/153.json,Prealgebra,general,A,0.0,1.0,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/prealgebra/874.json,Prealgebra,general,Tie,0.5,0.5,B,0.65,B,0.9,,,A
JiuZhang3.0-7B,test/prealgebra/1251.json,Prealgebra,general,B,0.0,1.0,A,0.7,B,0.9,A,1.0,B
JiuZhang3.0-7B,test/prealgebra/1458.json,Prealgebra,general,Tie,0.5,0.5,A,0.7,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/prealgebra/1995.json,Prealgebra,general,Tie,0.5,0.5,B,0.95,B,0.95,B,0.95,B
JiuZhang3.0-7B,test/prealgebra/1317.json,Prealgebra,general,Tie,0.5,0.5,A,0.85,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/prealgebra/1742.json,Prealgebra,general,Tie,0.5,0.5,B,0.9,A,0.95,B,1.0,A
JiuZhang3.0-7B,test/prealgebra/993.json,Prealgebra,general,Tie,0.5,0.5,B,0.75,A,0.95,A,0.95,B
JiuZhang3.0-7B,test/prealgebra/1834.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,0.9,A,0.8,A
JiuZhang3.0-7B,test/prealgebra/1512.json,Prealgebra,general,A,0.0,1.0,A,0.6,B,0.95,B,0.9,A
JiuZhang3.0-7B,test/prealgebra/260.json,Prealgebra,general,B,0.0,1.0,A,0.65,A,0.6,A,0.7,B
JiuZhang3.0-7B,test/prealgebra/1787.json,Prealgebra,general,Tie,0.5,0.5,B,0.85,B,0.98,A,0.95,B
JiuZhang3.0-7B,test/prealgebra/1044.json,Prealgebra,general,A,1.0,1.0,A,0.6,B,0.6,A,0.6,B
JiuZhang3.0-7B,test/prealgebra/465.json,Prealgebra,general,A,1.0,1.0,B,0.55,A,0.7,A,0.7,B
JiuZhang3.0-7B,test/prealgebra/1423.json,Prealgebra,general,B,1.0,1.0,B,0.95,B,0.95,B,0.95,B
JiuZhang3.0-7B,test/prealgebra/954.json,Prealgebra,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/prealgebra/1973.json,Prealgebra,general,A,1.0,1.0,A,0.55,A,0.95,A,1.0,B
JiuZhang3.0-7B,test/prealgebra/1730.json,Prealgebra,general,B,1.0,1.0,A,0.5,B,0.5,B,0.5,B
JiuZhang3.0-7B,test/prealgebra/1238.json,Prealgebra,general,Tie,0.5,0.5,B,1.0,B,0.98,B,1.0,A
JiuZhang3.0-7B,test/prealgebra/1353.json,Prealgebra,general,B,0.0,0.5,A,1.0,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/prealgebra/1187.json,Prealgebra,general,Tie,0.5,0.5,A,0.51,A,0.95,B,1.0,B
JiuZhang3.0-7B,test/prealgebra/1743.json,Prealgebra,general,B,1.0,1.0,B,0.85,B,0.95,B,0.95,A
JiuZhang3.0-7B,test/prealgebra/1865.json,Prealgebra,general,B,0.0,0.5,A,1.0,A,1.0,A,0.95,A
JiuZhang3.0-7B,test/prealgebra/1298.json,Prealgebra,general,Tie,0.5,0.5,A,0.85,A,0.95,B,1.0,A
JiuZhang3.0-7B,test/prealgebra/2066.json,Prealgebra,general,B,1.0,1.0,B,1.0,B,0.95,B,1.0,B
JiuZhang3.0-7B,test/prealgebra/631.json,Prealgebra,general,A,1.0,0.9,A,0.8,A,0.85,A,0.7,B
JiuZhang3.0-7B,test/prealgebra/977.json,Prealgebra,general,B,1.0,1.0,B,0.95,B,0.95,B,1.0,B
JiuZhang3.0-7B,test/prealgebra/1991.json,Prealgebra,general,B,0.0,0.9,B,0.85,A,0.9,,,A
JiuZhang3.0-7B,test/prealgebra/1784.json,Prealgebra,general,Tie,0.5,0.5,B,0.85,B,0.99,A,0.95,B
JiuZhang3.0-7B,test/prealgebra/1572.json,Prealgebra,general,A,0.0,1.0,B,1.0,B,0.95,B,1.0,B
JiuZhang3.0-7B,test/prealgebra/65.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/prealgebra/1227.json,Prealgebra,general,Tie,0.5,0.5,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/prealgebra/2019.json,Prealgebra,general,Tie,0.5,0.5,A,1.0,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/prealgebra/1640.json,Prealgebra,general,B,0.0,1.0,A,0.6,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/prealgebra/2037.json,Prealgebra,general,Tie,0.5,0.5,B,0.51,A,0.95,B,0.95,A
JiuZhang3.0-7B,test/prealgebra/996.json,Prealgebra,general,B,0.0,0.5,A,1.0,A,0.98,A,1.0,B
JiuZhang3.0-7B,test/prealgebra/805.json,Prealgebra,general,Tie,0.5,0.5,B,0.6,B,0.75,A,0.6,B
JiuZhang3.0-7B,test/prealgebra/914.json,Prealgebra,general,B,1.0,1.0,B,0.65,B,1.0,B,0.7,B
JiuZhang3.0-7B,test/prealgebra/1114.json,Prealgebra,general,B,1.0,1.0,B,0.95,B,0.95,B,0.9,B
JiuZhang3.0-7B,test/prealgebra/846.json,Prealgebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/prealgebra/1930.json,Prealgebra,general,Tie,0.5,0.5,A,0.6,B,0.5,,,B
JiuZhang3.0-7B,test/prealgebra/1252.json,Prealgebra,general,B,0.0,0.9,A,0.85,A,0.95,B,0.95,B
JiuZhang3.0-7B,test/prealgebra/1203.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/prealgebra/1128.json,Prealgebra,general,A,0.0,1.0,B,0.85,B,0.9,B,0.7,A
JiuZhang3.0-7B,test/geometry/248.json,Geometry,general,B,0.0,1.0,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/geometry/434.json,Geometry,general,Tie,0.5,0.5,A,0.7,A,0.9,A,0.8,A
JiuZhang3.0-7B,test/geometry/967.json,Geometry,general,Tie,0.5,0.5,A,1.0,A,0.95,A,0.95,B
JiuZhang3.0-7B,test/geometry/627.json,Geometry,general,Tie,0.5,0.5,B,0.65,A,0.85,B,0.7,A
JiuZhang3.0-7B,test/geometry/178.json,Geometry,general,Tie,0.5,0.5,B,0.9,A,0.95,B,0.95,B
JiuZhang3.0-7B,test/geometry/456.json,Geometry,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/geometry/353.json,Geometry,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/geometry/183.json,Geometry,general,A,0.0,0.5,B,0.6,B,0.5,B,0.7,A
JiuZhang3.0-7B,test/geometry/283.json,Geometry,general,Tie,0.5,0.5,A,0.7,B,0.5,A,0.7,A
JiuZhang3.0-7B,test/geometry/1140.json,Geometry,general,Tie,0.5,0.5,B,0.65,B,0.7,B,0.65,B
JiuZhang3.0-7B,test/geometry/172.json,Geometry,general,Tie,0.5,0.5,B,0.75,B,0.75,B,0.6,A
JiuZhang3.0-7B,test/geometry/880.json,Geometry,general,Tie,0.5,0.5,B,0.6,B,0.5,,,B
JiuZhang3.0-7B,test/geometry/802.json,Geometry,general,A,1.0,0.9,A,1.0,A,0.95,A,0.9,A
JiuZhang3.0-7B,test/geometry/65.json,Geometry,general,B,0.0,1.0,A,0.75,A,0.95,A,0.9,A
JiuZhang3.0-7B,test/geometry/702.json,Geometry,general,Tie,0.5,0.5,A,0.4,A,0.5,,,A
JiuZhang3.0-7B,test/geometry/221.json,Geometry,general,Tie,0.5,0.5,A,0.85,B,0.95,B,0.9,B
JiuZhang3.0-7B,test/geometry/547.json,Geometry,general,B,1.0,1.0,B,0.85,B,0.85,B,0.8,A
JiuZhang3.0-7B,test/geometry/229.json,Geometry,general,Tie,0.5,0.5,A,0.65,A,0.9,A,0.7,A
JiuZhang3.0-7B,test/geometry/254.json,Geometry,general,A,1.0,0.9,A,1.0,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/geometry/473.json,Geometry,general,B,0.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/geometry/347.json,Geometry,general,Tie,0.5,0.5,B,0.95,B,1.0,B,0.95,B
JiuZhang3.0-7B,test/geometry/483.json,Geometry,general,B,0.0,1.0,A,0.95,A,0.95,B,0.9,A
JiuZhang3.0-7B,test/geometry/826.json,Geometry,general,Tie,0.5,0.5,B,0.85,B,0.9,,,B
JiuZhang3.0-7B,test/geometry/226.json,Geometry,general,Tie,0.5,0.5,A,0.65,A,0.95,A,0.95,B
JiuZhang3.0-7B,test/geometry/686.json,Geometry,general,A,1.0,1.0,A,0.5,A,0.5,A,0.6,A
JiuZhang3.0-7B,test/geometry/1097.json,Geometry,general,Tie,0.5,0.5,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/geometry/965.json,Geometry,general,Tie,0.5,0.5,A,0.75,A,0.5,A,1.0,B
JiuZhang3.0-7B,test/geometry/711.json,Geometry,general,Tie,0.5,0.5,B,0.65,B,0.85,A,0.7,A
JiuZhang3.0-7B,test/geometry/1108.json,Geometry,general,B,0.0,1.0,A,0.7,A,0.9,A,0.7,B
JiuZhang3.0-7B,test/geometry/947.json,Geometry,general,B,1.0,1.0,B,0.85,B,0.85,B,0.8,B
JiuZhang3.0-7B,test/geometry/465.json,Geometry,general,Tie,0.5,0.5,A,0.7,B,0.5,A,0.3,A
JiuZhang3.0-7B,test/geometry/73.json,Geometry,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/geometry/106.json,Geometry,general,A,1.0,0.9,A,0.9,A,0.95,A,0.95,B
JiuZhang3.0-7B,test/geometry/846.json,Geometry,general,A,1.0,0.5,B,0.65,A,0.6,A,0.6,A
JiuZhang3.0-7B,test/geometry/538.json,Geometry,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,A
JiuZhang3.0-7B,test/geometry/795.json,Geometry,general,B,0.0,1.0,A,0.6,A,0.5,A,0.6,B
JiuZhang3.0-7B,test/geometry/817.json,Geometry,general,Tie,0.5,0.5,B,0.6,B,0.85,,,B
JiuZhang3.0-7B,test/geometry/843.json,Geometry,general,Tie,0.5,0.5,B,0.85,A,0.95,B,0.95,A
JiuZhang3.0-7B,test/geometry/477.json,Geometry,general,Tie,0.5,0.5,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/geometry/561.json,Geometry,general,A,0.0,1.0,B,0.6,B,0.5,A,0.7,A
JiuZhang3.0-7B,test/geometry/615.json,Geometry,general,Tie,0.5,0.5,B,0.95,B,0.95,B,0.8,A
JiuZhang3.0-7B,test/counting_and_probability/525.json,Counting & Probability,general,Tie,0.5,0.5,A,0.7,A,0.7,,,B
JiuZhang3.0-7B,test/counting_and_probability/666.json,Counting & Probability,general,Tie,0.5,0.5,A,0.95,A,0.95,A,1.0,A
JiuZhang3.0-7B,test/counting_and_probability/134.json,Counting & Probability,general,Tie,0.5,0.5,B,0.95,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/counting_and_probability/119.json,Counting & Probability,general,Tie,0.5,0.5,B,0.75,B,0.7,A,0.6,B
JiuZhang3.0-7B,test/counting_and_probability/1114.json,Counting & Probability,general,A,0.0,1.0,B,1.0,B,0.95,B,1.0,A
JiuZhang3.0-7B,test/counting_and_probability/377.json,Counting & Probability,general,Tie,0.5,0.5,B,0.95,B,0.95,B,1.0,B
JiuZhang3.0-7B,test/counting_and_probability/23957.json,Counting & Probability,general,B,1.0,1.0,B,0.75,A,1.0,B,0.8,B
JiuZhang3.0-7B,test/counting_and_probability/1060.json,Counting & Probability,general,A,0.0,1.0,B,1.0,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/counting_and_probability/430.json,Counting & Probability,general,B,1.0,1.0,B,0.75,B,0.9,B,0.8,A
JiuZhang3.0-7B,test/counting_and_probability/159.json,Counting & Probability,general,Tie,0.5,0.5,A,0.8,A,0.5,A,0.8,A
JiuZhang3.0-7B,test/counting_and_probability/230.json,Counting & Probability,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/counting_and_probability/803.json,Counting & Probability,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/counting_and_probability/181.json,Counting & Probability,general,Tie,0.5,0.5,A,0.65,B,0.85,B,0.7,A
JiuZhang3.0-7B,test/counting_and_probability/51.json,Counting & Probability,general,Tie,0.5,0.5,A,0.8,B,0.95,B,0.8,A
JiuZhang3.0-7B,test/counting_and_probability/508.json,Counting & Probability,general,A,0.0,1.0,B,1.0,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/counting_and_probability/389.json,Counting & Probability,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,B
JiuZhang3.0-7B,test/counting_and_probability/765.json,Counting & Probability,general,Tie,0.5,0.5,B,0.75,A,0.85,A,0.7,B
JiuZhang3.0-7B,test/counting_and_probability/282.json,Counting & Probability,general,A,1.0,0.5,A,1.0,A,0.6,,,B
JiuZhang3.0-7B,test/counting_and_probability/71.json,Counting & Probability,general,Tie,0.5,0.5,A,0.8,A,0.5,A,0.9,B
JiuZhang3.0-7B,test/counting_and_probability/894.json,Counting & Probability,general,A,0.0,1.0,B,0.75,B,0.7,A,0.7,B
JiuZhang3.0-7B,test/counting_and_probability/1009.json,Counting & Probability,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/counting_and_probability/913.json,Counting & Probability,general,Tie,0.5,0.5,B,0.8,B,0.95,B,0.95,A
JiuZhang3.0-7B,test/counting_and_probability/25149.json,Counting & Probability,general,Tie,0.5,0.5,B,1.0,B,1.0,B,1.0,B
JiuZhang3.0-7B,test/counting_and_probability/339.json,Counting & Probability,general,Tie,0.5,0.5,B,0.55,A,0.95,B,0.9,B
JiuZhang3.0-7B,test/counting_and_probability/870.json,Counting & Probability,general,A,0.0,0.9,B,0.65,B,0.9,B,0.7,B
JiuZhang3.0-7B,test/counting_and_probability/216.json,Counting & Probability,general,A,0.0,1.0,B,0.9,B,1.0,B,0.8,A
JiuZhang3.0-7B,test/counting_and_probability/737.json,Counting & Probability,general,A,0.0,1.0,B,0.95,A,0.95,B,1.0,A
JiuZhang3.0-7B,test/counting_and_probability/116.json,Counting & Probability,general,Tie,0.5,0.5,B,0.55,B,0.9,A,0.9,B
JiuZhang3.0-7B,test/counting_and_probability/238.json,Counting & Probability,general,Tie,0.5,0.5,A,1.0,A,1.0,A,1.0,A
JiuZhang3.0-7B,test/counting_and_probability/1014.json,Counting & Probability,general,A,0.0,1.0,B,1.0,B,1.0,B,1.0,A
JiuZhang3.0-7B,test/counting_and_probability/14.json,Counting & Probability,general,Tie,0.5,0.5,A,0.6,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/counting_and_probability/188.json,Counting & Probability,general,A,1.0,1.0,A,1.0,A,0.9,A,0.6,B
JiuZhang3.0-7B,test/counting_and_probability/761.json,Counting & Probability,general,Tie,0.5,0.5,B,0.55,A,0.95,A,0.95,A
JiuZhang3.0-7B,test/counting_and_probability/10.json,Counting & Probability,general,Tie,0.5,0.5,A,0.95,A,0.6,A,0.7,A
JiuZhang3.0-7B,test/counting_and_probability/731.json,Counting & Probability,general,B,1.0,0.9,B,0.1,B,0.9,A,0.7,B
JiuZhang3.0-7B,test/counting_and_probability/190.json,Counting & Probability,general,B,1.0,1.0,B,1.0,B,0.95,B,0.95,B
JiuZhang3.0-7B,test/counting_and_probability/1003.json,Counting & Probability,general,Tie,0.5,0.5,A,0.65,A,0.9,A,0.7,A
JiuZhang3.0-7B,test/counting_and_probability/199.json,Counting & Probability,general,A,0.0,0.9,B,0.95,B,0.95,B,0.7,B
Ministral-8B-Instruct-2410,test/precalculus/807.json,Precalculus,general,A,1.0,0.9,B,0.9,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/precalculus/927.json,Precalculus,general,B,1.0,0.95,B,0.85,B,0.7,,,A
Ministral-8B-Instruct-2410,test/precalculus/1303.json,Precalculus,general,B,1.0,0.85,B,0.7,A,0.6,,,B
Ministral-8B-Instruct-2410,test/precalculus/990.json,Precalculus,general,A,1.0,1.0,B,0.7,A,0.85,,,B
Ministral-8B-Instruct-2410,test/precalculus/1199.json,Precalculus,general,A,0.0,0.85,B,0.8,B,0.85,,,B
Ministral-8B-Instruct-2410,test/precalculus/779.json,Precalculus,general,B,0.0,0.9,B,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/precalculus/285.json,Precalculus,general,A,1.0,0.9,A,0.75,B,0.5,A,0.7,A
Ministral-8B-Instruct-2410,test/precalculus/1105.json,Precalculus,general,A,0.0,0.9,B,1.0,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/precalculus/675.json,Precalculus,general,B,0.0,0.9,B,0.75,A,0.9,,,A
Ministral-8B-Instruct-2410,test/precalculus/1146.json,Precalculus,general,B,1.0,0.9,B,0.7,B,0.85,B,0.6,A
Ministral-8B-Instruct-2410,test/precalculus/1313.json,Precalculus,general,B,1.0,0.95,B,0.98,B,0.98,B,1.0,A
Ministral-8B-Instruct-2410,test/precalculus/24313.json,Precalculus,general,A,0.0,0.95,A,0.85,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/precalculus/34.json,Precalculus,general,B,0.0,0.95,A,1.0,A,1.0,A,1.0,A
Ministral-8B-Instruct-2410,test/precalculus/1300.json,Precalculus,general,B,1.0,0.95,B,1.0,B,1.0,B,1.0,B
Ministral-8B-Instruct-2410,test/precalculus/44.json,Precalculus,general,B,1.0,1.0,B,0.85,B,0.85,B,0.7,B
Ministral-8B-Instruct-2410,test/precalculus/477.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/precalculus/43.json,Precalculus,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/precalculus/986.json,Precalculus,general,B,1.0,1.0,B,0.95,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/precalculus/117.json,Precalculus,general,A,0.0,0.95,B,0.7,B,0.85,B,0.7,B
Ministral-8B-Instruct-2410,test/precalculus/697.json,Precalculus,general,A,0.0,1.0,B,0.7,B,0.85,A,0.8,B
Ministral-8B-Instruct-2410,test/precalculus/659.json,Precalculus,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,B
Ministral-8B-Instruct-2410,test/precalculus/263.json,Precalculus,general,A,1.0,1.0,A,1.0,A,0.95,A,1.0,B
Ministral-8B-Instruct-2410,test/precalculus/541.json,Precalculus,general,A,0.0,0.9,B,0.9,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/precalculus/190.json,Precalculus,general,A,0.0,0.9,A,0.75,B,0.85,B,0.9,A
Ministral-8B-Instruct-2410,test/precalculus/819.json,Precalculus,general,A,0.0,0.9,B,1.0,B,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/precalculus/1056.json,Precalculus,general,B,1.0,0.9,A,0.75,B,0.7,B,0.8,A
Ministral-8B-Instruct-2410,test/precalculus/441.json,Precalculus,general,B,1.0,0.95,B,0.9,B,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/precalculus/989.json,Precalculus,general,A,0.0,1.0,B,0.65,B,0.85,A,0.8,A
Ministral-8B-Instruct-2410,test/precalculus/920.json,Precalculus,general,B,1.0,0.95,A,0.7,B,0.9,B,0.6,A
Ministral-8B-Instruct-2410,test/precalculus/452.json,Precalculus,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,B
Ministral-8B-Instruct-2410,test/precalculus/580.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.85,,,B
Ministral-8B-Instruct-2410,test/precalculus/768.json,Precalculus,general,B,0.0,0.95,A,0.75,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/precalculus/1172.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/precalculus/1201.json,Precalculus,general,B,0.0,0.95,A,0.7,B,0.6,A,0.8,A
Ministral-8B-Instruct-2410,test/precalculus/881.json,Precalculus,general,A,0.0,0.95,B,0.75,B,0.7,,,B
Ministral-8B-Instruct-2410,test/precalculus/695.json,Precalculus,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,B
Ministral-8B-Instruct-2410,test/precalculus/742.json,Precalculus,general,A,1.0,0.9,A,0.85,B,0.75,,,A
Ministral-8B-Instruct-2410,test/precalculus/801.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.95,,,A
Ministral-8B-Instruct-2410,test/precalculus/826.json,Precalculus,general,B,1.0,0.9,A,0.65,B,0.7,B,0.65,B
Ministral-8B-Instruct-2410,test/precalculus/1281.json,Precalculus,general,A,1.0,1.0,A,0.85,A,0.7,,,A
Ministral-8B-Instruct-2410,test/precalculus/96.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.99,B,0.8,B
Ministral-8B-Instruct-2410,test/precalculus/1289.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/precalculus/902.json,Precalculus,general,B,0.0,0.7,A,0.65,A,0.85,,,A
Ministral-8B-Instruct-2410,test/precalculus/1291.json,Precalculus,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,B
Ministral-8B-Instruct-2410,test/precalculus/398.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,,,B
Ministral-8B-Instruct-2410,test/precalculus/681.json,Precalculus,general,B,0.0,0.95,B,0.6,A,0.6,A,0.8,A
Ministral-8B-Instruct-2410,test/precalculus/145.json,Precalculus,general,A,1.0,0.9,A,0.75,A,0.6,B,0.75,B
Ministral-8B-Instruct-2410,test/precalculus/625.json,Precalculus,general,A,0.0,0.95,B,0.75,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/precalculus/1202.json,Precalculus,general,A,0.0,1.0,B,0.65,B,0.85,A,0.8,A
Ministral-8B-Instruct-2410,test/precalculus/1133.json,Precalculus,general,A,0.0,0.95,A,0.65,B,0.7,,,B
Ministral-8B-Instruct-2410,test/precalculus/499.json,Precalculus,general,A,0.0,0.9,B,0.9,B,0.9,B,0.8,B
Ministral-8B-Instruct-2410,test/precalculus/323.json,Precalculus,general,B,1.0,0.95,A,0.75,B,0.95,,,A
Ministral-8B-Instruct-2410,test/precalculus/703.json,Precalculus,general,B,1.0,0.95,B,0.65,B,0.5,A,0.6,A
Ministral-8B-Instruct-2410,test/precalculus/1252.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/precalculus/1082.json,Precalculus,general,A,0.0,1.0,B,0.95,B,0.95,B,0.9,B
Ministral-8B-Instruct-2410,test/precalculus/356.json,Precalculus,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1994.json,Intermediate Algebra,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1197.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.75,,,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/134.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1000.json,Intermediate Algebra,general,B,0.0,1.0,A,0.9,A,0.85,A,1.0,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/607.json,Intermediate Algebra,general,A,1.0,0.95,A,0.75,A,0.85,A,0.6,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1388.json,Intermediate Algebra,general,B,0.0,0.95,B,0.75,A,0.85,,,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/428.json,Intermediate Algebra,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1454.json,Intermediate Algebra,general,B,0.0,0.95,B,0.6,A,0.6,A,0.7,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1217.json,Intermediate Algebra,general,B,0.0,0.95,A,1.0,A,0.98,A,1.0,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1168.json,Intermediate Algebra,general,A,1.0,1.0,A,1.0,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/956.json,Intermediate Algebra,general,A,0.0,0.95,B,0.7,B,0.95,,,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1247.json,Intermediate Algebra,general,A,0.0,1.0,B,1.0,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/279.json,Intermediate Algebra,general,A,1.0,0.85,A,0.95,A,0.85,A,0.6,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/207.json,Intermediate Algebra,general,A,1.0,0.9,A,0.85,A,0.9,A,0.8,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/623.json,Intermediate Algebra,general,A,0.0,1.0,B,0.7,B,0.85,A,0.7,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/47.json,Intermediate Algebra,general,B,1.0,0.95,B,1.0,B,0.98,B,0.95,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1849.json,Intermediate Algebra,general,A,0.0,0.9,A,0.75,B,0.9,B,0.9,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/2046.json,Intermediate Algebra,general,B,0.0,1.0,A,1.0,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/662.json,Intermediate Algebra,general,A,1.0,0.95,A,0.6,A,0.6,A,0.6,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/582.json,Intermediate Algebra,general,A,1.0,0.95,A,0.7,B,0.5,,,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/431.json,Intermediate Algebra,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/558.json,Intermediate Algebra,general,B,1.0,0.95,B,0.6,B,0.85,,,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/362.json,Intermediate Algebra,general,A,1.0,1.0,A,0.65,A,0.95,A,0.8,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/515.json,Intermediate Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/894.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.85,,,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/345.json,Intermediate Algebra,general,B,1.0,0.95,B,0.75,A,0.65,,,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1898.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/232.json,Intermediate Algebra,general,A,0.0,1.0,B,1.0,B,0.98,B,1.0,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/128.json,Intermediate Algebra,general,A,1.0,0.95,B,0.75,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1063.json,Intermediate Algebra,general,B,1.0,0.95,B,0.65,B,0.85,,,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1126.json,Intermediate Algebra,general,B,1.0,0.9,B,1.0,B,0.98,B,1.0,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/2022.json,Intermediate Algebra,general,B,1.0,1.0,B,1.0,B,0.98,B,1.0,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1151.json,Intermediate Algebra,general,B,0.0,1.0,A,0.95,A,0.85,A,0.9,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1408.json,Intermediate Algebra,general,A,0.0,1.0,B,0.65,B,0.75,A,0.7,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/966.json,Intermediate Algebra,general,B,1.0,0.95,B,0.75,B,0.95,,,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/964.json,Intermediate Algebra,general,A,0.0,0.95,B,0.6,A,0.7,B,0.65,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1410.json,Intermediate Algebra,general,B,0.0,0.95,A,0.9,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/991.json,Intermediate Algebra,general,B,1.0,0.9,B,0.6,B,0.85,A,0.8,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/183.json,Intermediate Algebra,general,B,0.0,0.9,A,0.65,A,0.7,A,0.7,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1422.json,Intermediate Algebra,general,A,1.0,0.8,A,0.98,A,1.0,A,1.0,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/2196.json,Intermediate Algebra,general,B,1.0,0.95,B,0.6,B,0.7,A,0.7,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/591.json,Intermediate Algebra,general,A,0.0,0.95,B,0.92,B,0.85,,,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1555.json,Intermediate Algebra,general,B,0.0,1.0,A,1.0,A,1.0,A,1.0,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1510.json,Intermediate Algebra,general,A,1.0,1.0,B,0.75,A,0.85,,,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/102.json,Intermediate Algebra,general,A,0.0,1.0,B,1.0,B,0.99,B,1.0,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/986.json,Intermediate Algebra,general,B,1.0,0.95,B,1.0,B,0.95,A,0.6,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1354.json,Intermediate Algebra,general,A,0.0,1.0,B,0.7,A,0.5,,,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1837.json,Intermediate Algebra,general,A,1.0,0.95,A,0.75,B,0.95,A,0.9,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/337.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1210.json,Intermediate Algebra,general,A,0.0,1.0,B,0.75,B,0.85,,,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1123.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/149.json,Intermediate Algebra,general,B,0.0,0.95,B,0.85,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1411.json,Intermediate Algebra,general,A,1.0,0.95,A,0.6,A,0.85,A,0.5,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/960.json,Intermediate Algebra,general,B,1.0,0.95,B,0.65,B,0.65,B,0.65,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1300.json,Intermediate Algebra,general,A,1.0,0.9,B,0.8,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/90.json,Intermediate Algebra,general,B,0.0,1.0,A,0.7,B,0.9,A,0.9,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/754.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,A,0.7,,,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/446.json,Intermediate Algebra,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1544.json,Intermediate Algebra,general,B,0.0,1.0,A,0.7,A,0.85,A,0.7,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1714.json,Intermediate Algebra,general,B,0.0,0.9,A,0.85,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/2152.json,Intermediate Algebra,general,B,1.0,0.95,B,0.7,B,0.5,A,0.7,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/117.json,Intermediate Algebra,general,B,0.0,0.85,A,0.75,B,0.7,,,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/190.json,Intermediate Algebra,general,B,1.0,0.9,B,0.65,B,0.85,A,0.6,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/776.json,Intermediate Algebra,general,B,1.0,0.95,B,0.75,B,0.95,,,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1566.json,Intermediate Algebra,general,B,1.0,0.9,B,1.0,B,0.98,B,1.0,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1572.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1166.json,Intermediate Algebra,general,B,1.0,0.95,B,0.7,B,0.65,A,0.7,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/860.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1407.json,Intermediate Algebra,general,A,0.0,0.95,B,1.0,B,0.98,,,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1405.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.9,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/690.json,Intermediate Algebra,general,B,0.0,0.9,B,0.8,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/955.json,Intermediate Algebra,general,A,1.0,0.9,B,0.6,A,0.7,A,0.4,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1992.json,Intermediate Algebra,general,B,1.0,0.95,B,0.75,B,0.95,,,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1111.json,Intermediate Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.9,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1791.json,Intermediate Algebra,general,B,0.0,1.0,B,0.85,A,0.85,,,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1806.json,Intermediate Algebra,general,A,1.0,0.9,A,0.85,B,0.85,A,0.65,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1797.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/2146.json,Intermediate Algebra,general,A,0.0,1.0,B,0.85,B,0.6,B,0.7,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/2015.json,Intermediate Algebra,general,B,1.0,0.95,A,0.6,B,0.75,,,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/121.json,Intermediate Algebra,general,B,0.0,0.9,B,0.8,A,0.7,A,0.7,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1014.json,Intermediate Algebra,general,A,0.0,1.0,B,0.75,B,0.85,B,1.0,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1462.json,Intermediate Algebra,general,A,1.0,1.0,B,0.6,A,0.7,A,0.6,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/199.json,Intermediate Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1779.json,Intermediate Algebra,general,A,1.0,0.95,A,0.75,A,0.65,,,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1102.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.8,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/834.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/158.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.75,,,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/752.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1279.json,Intermediate Algebra,general,B,0.0,0.9,A,0.85,A,0.6,A,0.8,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1467.json,Intermediate Algebra,general,A,1.0,0.95,A,0.6,B,0.5,,,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/101.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.98,B,0.95,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1365.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.85,B,0.8,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1350.json,Intermediate Algebra,general,A,0.0,1.0,B,0.75,B,0.85,,,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1930.json,Intermediate Algebra,general,B,1.0,1.0,B,0.65,B,0.9,,,A
Ministral-8B-Instruct-2410,test/intermediate_algebra/1981.json,Intermediate Algebra,general,A,1.0,0.9,A,0.6,A,0.7,B,0.7,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1232.json,Intermediate Algebra,general,A,1.0,0.9,A,0.9,A,1.0,A,1.0,B
Ministral-8B-Instruct-2410,test/intermediate_algebra/1508.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,A,0.7,,,B
Ministral-8B-Instruct-2410,test/algebra/2584.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/1349.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2036.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/1098.json,Algebra,general,B,1.0,0.95,B,0.8,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/1837.json,Algebra,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/algebra/2193.json,Algebra,general,A,0.0,1.0,B,0.95,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/2427.json,Algebra,general,B,1.0,0.95,B,0.7,B,0.85,B,0.7,A
Ministral-8B-Instruct-2410,test/algebra/1072.json,Algebra,general,A,0.0,1.0,B,0.85,A,0.85,B,0.9,A
Ministral-8B-Instruct-2410,test/algebra/24.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/2214.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/algebra/305.json,Algebra,general,B,0.0,0.95,A,0.8,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/1265.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/187.json,Algebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.9,B
Ministral-8B-Instruct-2410,test/algebra/769.json,Algebra,general,A,0.0,0.9,B,0.95,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/722.json,Algebra,general,B,1.0,1.0,B,0.85,B,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/algebra/2046.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2253.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/1004.json,Algebra,general,B,1.0,0.9,A,0.85,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/1035.json,Algebra,general,A,1.0,0.9,A,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2700.json,Algebra,general,A,1.0,1.0,A,0.8,A,0.6,A,0.7,B
Ministral-8B-Instruct-2410,test/algebra/893.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/algebra/567.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/algebra/892.json,Algebra,general,B,0.0,0.9,A,0.95,A,0.9,A,1.0,A
Ministral-8B-Instruct-2410,test/algebra/2023.json,Algebra,general,B,0.0,0.9,A,0.6,A,0.95,A,0.6,B
Ministral-8B-Instruct-2410,test/algebra/873.json,Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/2058.json,Algebra,general,B,0.0,0.95,A,1.0,A,1.0,A,1.0,A
Ministral-8B-Instruct-2410,test/algebra/2593.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2157.json,Algebra,general,A,1.0,0.95,A,0.8,B,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2251.json,Algebra,general,A,0.0,1.0,B,0.98,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/1332.json,Algebra,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/972.json,Algebra,general,B,1.0,0.95,B,0.85,A,0.98,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2232.json,Algebra,general,A,1.0,0.9,A,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/661.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.9,A
Ministral-8B-Instruct-2410,test/algebra/246.json,Algebra,general,A,1.0,1.0,B,0.9,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/algebra/1519.json,Algebra,general,B,0.0,0.9,B,0.9,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/988.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/2570.json,Algebra,general,B,0.0,0.9,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/621.json,Algebra,general,B,1.0,0.95,B,0.6,B,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/algebra/1255.json,Algebra,general,B,0.0,1.0,A,0.9,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/algebra/2517.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/478.json,Algebra,general,B,0.0,0.95,A,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/297.json,Algebra,general,B,1.0,0.95,B,0.7,B,0.9,B,0.8,A
Ministral-8B-Instruct-2410,test/algebra/841.json,Algebra,general,B,1.0,1.0,B,0.95,B,0.98,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/686.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,B
Ministral-8B-Instruct-2410,test/algebra/351.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/algebra/1275.json,Algebra,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/algebra/1082.json,Algebra,general,A,0.0,1.0,B,0.65,B,0.9,A,0.8,A
Ministral-8B-Instruct-2410,test/algebra/1214.json,Algebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/2199.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/733.json,Algebra,general,A,0.0,1.0,B,0.85,A,0.9,B,0.8,A
Ministral-8B-Instruct-2410,test/algebra/109.json,Algebra,general,B,0.0,0.9,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/1937.json,Algebra,general,B,0.0,0.95,A,0.85,B,0.95,A,0.8,B
Ministral-8B-Instruct-2410,test/algebra/291.json,Algebra,general,A,1.0,1.0,A,0.8,A,0.9,,,B
Ministral-8B-Instruct-2410,test/algebra/2102.json,Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/907.json,Algebra,general,A,1.0,0.9,A,0.92,A,0.98,A,1.0,B
Ministral-8B-Instruct-2410,test/algebra/864.json,Algebra,general,B,0.0,0.95,B,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/2159.json,Algebra,general,A,1.0,0.95,A,0.85,B,0.95,A,1.0,B
Ministral-8B-Instruct-2410,test/algebra/1578.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/975.json,Algebra,general,B,1.0,0.95,B,0.6,B,0.6,A,0.9,B
Ministral-8B-Instruct-2410,test/algebra/1143.json,Algebra,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,A
Ministral-8B-Instruct-2410,test/algebra/2626.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.9,B
Ministral-8B-Instruct-2410,test/algebra/1787.json,Algebra,general,B,0.0,0.95,A,1.0,A,1.0,A,1.0,A
Ministral-8B-Instruct-2410,test/algebra/1934.json,Algebra,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,A
Ministral-8B-Instruct-2410,test/algebra/2064.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/694.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/524.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/2551.json,Algebra,general,A,0.0,0.9,B,0.9,A,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/346.json,Algebra,general,B,1.0,1.0,B,0.85,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/1282.json,Algebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
Ministral-8B-Instruct-2410,test/algebra/1184.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/634.json,Algebra,general,B,0.0,0.95,A,0.51,A,0.98,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/2486.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.9,A,1.0,B
Ministral-8B-Instruct-2410,test/algebra/2257.json,Algebra,general,A,1.0,0.95,A,0.98,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/1842.json,Algebra,general,B,1.0,0.9,B,0.9,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/791.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/276.json,Algebra,general,B,1.0,0.95,B,1.0,B,1.0,B,1.0,A
Ministral-8B-Instruct-2410,test/algebra/2735.json,Algebra,general,A,0.0,0.9,B,0.75,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/425.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/1936.json,Algebra,general,B,0.0,0.95,B,0.85,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/2176.json,Algebra,general,A,0.0,1.0,B,0.75,B,0.8,,,A
Ministral-8B-Instruct-2410,test/algebra/509.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.8,A
Ministral-8B-Instruct-2410,test/algebra/1457.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/algebra/2592.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.98,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/858.json,Algebra,general,B,0.0,0.95,A,1.0,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/algebra/1529.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/1338.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/1547.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/529.json,Algebra,general,B,1.0,0.95,B,0.95,A,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/1078.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,B
Ministral-8B-Instruct-2410,test/algebra/251.json,Algebra,general,B,1.0,0.9,B,0.98,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/1199.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2264.json,Algebra,general,A,0.0,1.0,B,0.9,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/1303.json,Algebra,general,B,1.0,0.95,B,0.85,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/101.json,Algebra,general,B,1.0,1.0,B,0.8,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/170.json,Algebra,general,B,0.0,1.0,A,0.75,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/849.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/algebra/1031.json,Algebra,general,B,0.0,0.9,B,0.85,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/853.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.9,A
Ministral-8B-Instruct-2410,test/algebra/2277.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/518.json,Algebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/114.json,Algebra,general,A,1.0,0.95,B,0.55,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/algebra/1960.json,Algebra,general,B,0.0,0.95,A,0.9,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/2680.json,Algebra,general,A,0.0,1.0,B,0.85,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2391.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/776.json,Algebra,general,A,0.0,1.0,B,0.85,B,0.85,,,A
Ministral-8B-Instruct-2410,test/algebra/1796.json,Algebra,general,B,1.0,0.95,B,0.85,B,0.98,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/1339.json,Algebra,general,B,0.0,0.95,B,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/algebra/2743.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/2043.json,Algebra,general,B,1.0,0.95,B,0.95,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/1553.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2080.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/algebra/1343.json,Algebra,general,B,1.0,0.95,B,1.0,B,1.0,B,1.0,B
Ministral-8B-Instruct-2410,test/algebra/668.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2430.json,Algebra,general,A,1.0,1.0,A,0.9,A,1.0,A,0.8,B
Ministral-8B-Instruct-2410,test/algebra/2789.json,Algebra,general,A,0.0,0.9,B,0.75,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/1814.json,Algebra,general,B,0.0,1.0,B,0.65,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2476.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2780.json,Algebra,general,B,1.0,0.95,B,0.85,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/algebra/824.json,Algebra,general,B,1.0,0.95,B,1.0,B,1.0,B,1.0,A
Ministral-8B-Instruct-2410,test/algebra/1425.json,Algebra,general,A,0.0,0.95,B,0.75,A,0.95,B,0.9,B
Ministral-8B-Instruct-2410,test/algebra/224.json,Algebra,general,B,1.0,0.9,B,0.95,A,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/algebra/435.json,Algebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2470.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/algebra/2779.json,Algebra,general,A,1.0,0.9,A,1.0,A,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/number_theory/572.json,Number Theory,general,A,1.0,0.95,A,0.6,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/515.json,Number Theory,general,B,1.0,0.95,B,0.95,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/number_theory/1032.json,Number Theory,general,B,1.0,0.9,B,0.92,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/737.json,Number Theory,general,A,0.0,1.0,B,1.0,B,1.0,B,1.0,B
Ministral-8B-Instruct-2410,test/number_theory/864.json,Number Theory,general,A,0.0,0.95,B,1.0,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/627.json,Number Theory,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/45.json,Number Theory,general,B,0.0,0.95,A,0.75,B,0.95,A,0.9,A
Ministral-8B-Instruct-2410,test/number_theory/1055.json,Number Theory,general,B,0.0,0.9,A,0.95,A,0.75,,,A
Ministral-8B-Instruct-2410,test/number_theory/46.json,Number Theory,general,B,0.0,0.9,B,0.85,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/516.json,Number Theory,general,B,1.0,0.8,B,0.6,B,0.99,,,B
Ministral-8B-Instruct-2410,test/number_theory/357.json,Number Theory,general,A,0.0,1.0,A,0.75,B,0.85,B,0.85,B
Ministral-8B-Instruct-2410,test/number_theory/914.json,Number Theory,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,B
Ministral-8B-Instruct-2410,test/number_theory/847.json,Number Theory,general,B,0.0,0.9,A,0.85,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/753.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.98,B,1.0,A
Ministral-8B-Instruct-2410,test/number_theory/1257.json,Number Theory,general,A,0.0,0.95,B,0.75,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/156.json,Number Theory,general,B,1.0,1.0,B,0.98,B,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/number_theory/612.json,Number Theory,general,A,1.0,0.95,A,0.8,A,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/number_theory/931.json,Number Theory,general,B,1.0,0.95,B,0.75,A,0.98,B,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/521.json,Number Theory,general,B,1.0,0.9,B,0.8,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/598.json,Number Theory,general,A,1.0,0.95,A,0.75,A,0.85,A,0.8,B
Ministral-8B-Instruct-2410,test/number_theory/978.json,Number Theory,general,A,0.0,1.0,B,0.65,B,0.9,,,B
Ministral-8B-Instruct-2410,test/number_theory/838.json,Number Theory,general,B,1.0,0.8,B,0.75,B,0.75,B,0.7,A
Ministral-8B-Instruct-2410,test/number_theory/149.json,Number Theory,general,B,0.0,0.9,A,0.95,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/1201.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/number_theory/234.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,A,0.8,B
Ministral-8B-Instruct-2410,test/number_theory/417.json,Number Theory,general,B,1.0,0.95,A,0.6,B,0.85,B,0.7,A
Ministral-8B-Instruct-2410,test/number_theory/89.json,Number Theory,general,A,0.0,0.9,B,0.85,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/number_theory/183.json,Number Theory,general,B,1.0,0.95,B,0.98,B,0.98,B,1.0,A
Ministral-8B-Instruct-2410,test/number_theory/1065.json,Number Theory,general,A,1.0,0.9,A,0.8,A,0.85,A,0.7,A
Ministral-8B-Instruct-2410,test/number_theory/466.json,Number Theory,general,A,1.0,0.9,A,0.85,A,0.95,A,0.9,B
Ministral-8B-Instruct-2410,test/number_theory/634.json,Number Theory,general,A,0.0,0.95,B,0.98,B,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/number_theory/533.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.85,B,0.9,B
Ministral-8B-Instruct-2410,test/number_theory/691.json,Number Theory,general,B,1.0,0.9,B,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/1287.json,Number Theory,general,A,1.0,0.9,B,0.55,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/631.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.5,B
Ministral-8B-Instruct-2410,test/number_theory/488.json,Number Theory,general,A,1.0,0.95,B,0.75,A,0.95,A,0.7,B
Ministral-8B-Instruct-2410,test/number_theory/1172.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/number_theory/203.json,Number Theory,general,B,0.0,0.95,A,0.8,A,0.85,A,0.7,B
Ministral-8B-Instruct-2410,test/number_theory/911.json,Number Theory,general,B,0.0,0.9,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/number_theory/483.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/368.json,Number Theory,general,B,0.0,0.9,A,0.9,A,0.95,A,0.8,A
Ministral-8B-Instruct-2410,test/number_theory/686.json,Number Theory,general,B,1.0,0.95,B,0.85,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/number_theory/820.json,Number Theory,general,A,1.0,0.95,B,0.7,A,0.5,A,0.7,B
Ministral-8B-Instruct-2410,test/number_theory/109.json,Number Theory,general,B,0.0,0.85,A,1.0,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/number_theory/427.json,Number Theory,general,A,1.0,0.95,A,0.75,A,0.95,A,0.9,B
Ministral-8B-Instruct-2410,test/number_theory/1185.json,Number Theory,general,B,0.0,0.9,A,0.9,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/number_theory/928.json,Number Theory,general,B,1.0,0.95,B,1.0,B,0.98,B,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/132.json,Number Theory,general,B,0.0,0.95,B,0.95,A,0.95,A,0.9,A
Ministral-8B-Instruct-2410,test/number_theory/769.json,Number Theory,general,A,0.0,0.95,A,0.85,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/number_theory/1002.json,Number Theory,general,A,0.0,1.0,B,0.85,B,0.95,B,0.9,B
Ministral-8B-Instruct-2410,test/number_theory/410.json,Number Theory,general,B,0.0,1.0,B,0.55,A,0.6,A,0.7,A
Ministral-8B-Instruct-2410,test/number_theory/255.json,Number Theory,general,B,0.0,0.9,A,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/1000.json,Number Theory,general,B,1.0,0.9,B,0.65,B,0.95,,,B
Ministral-8B-Instruct-2410,test/number_theory/13.json,Number Theory,general,A,0.0,1.0,B,0.85,B,0.9,B,0.8,B
Ministral-8B-Instruct-2410,test/number_theory/459.json,Number Theory,general,B,1.0,0.95,B,0.85,B,0.95,B,0.9,B
Ministral-8B-Instruct-2410,test/number_theory/342.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/number_theory/679.json,Number Theory,general,A,0.0,0.95,B,1.0,A,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/number_theory/72.json,Number Theory,general,A,0.0,0.9,B,0.95,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/number_theory/22.json,Number Theory,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/number_theory/1128.json,Number Theory,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/number_theory/1090.json,Number Theory,general,A,1.0,0.95,B,0.3,A,0.99,A,0.9,A
Ministral-8B-Instruct-2410,test/number_theory/239.json,Number Theory,general,B,0.0,0.9,A,1.0,A,1.0,A,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/1622.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/1139.json,Prealgebra,general,B,1.0,0.95,B,0.65,B,0.85,,,B
Ministral-8B-Instruct-2410,test/prealgebra/1840.json,Prealgebra,general,A,0.0,1.0,B,1.0,B,1.0,B,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/1302.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/930.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/1558.json,Prealgebra,general,B,0.0,0.95,A,0.85,A,0.95,A,0.85,B
Ministral-8B-Instruct-2410,test/prealgebra/1388.json,Prealgebra,general,B,0.0,1.0,B,0.9,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/951.json,Prealgebra,general,B,1.0,0.95,B,0.9,A,0.95,B,0.9,A
Ministral-8B-Instruct-2410,test/prealgebra/572.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/1247.json,Prealgebra,general,B,0.0,1.0,A,0.85,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/1747.json,Prealgebra,general,B,0.0,0.95,B,0.85,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/prealgebra/1233.json,Prealgebra,general,B,1.0,0.9,A,0.9,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/192.json,Prealgebra,general,B,0.0,0.9,A,0.9,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/307.json,Prealgebra,general,B,1.0,0.95,B,0.85,A,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/prealgebra/1761.json,Prealgebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/1646.json,Prealgebra,general,B,1.0,1.0,B,0.65,B,0.85,B,0.75,A
Ministral-8B-Instruct-2410,test/prealgebra/105.json,Prealgebra,general,B,1.0,0.85,B,0.85,B,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/prealgebra/1924.json,Prealgebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/1804.json,Prealgebra,general,A,0.0,0.9,B,0.85,B,0.9,B,0.9,A
Ministral-8B-Instruct-2410,test/prealgebra/1733.json,Prealgebra,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/505.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/1686.json,Prealgebra,general,A,1.0,0.85,B,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/1807.json,Prealgebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/1297.json,Prealgebra,general,B,0.0,1.0,A,0.8,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/1655.json,Prealgebra,general,B,0.0,1.0,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/prealgebra/1356.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/1003.json,Prealgebra,general,B,0.0,0.95,B,0.6,A,0.85,,,A
Ministral-8B-Instruct-2410,test/prealgebra/1272.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/1113.json,Prealgebra,general,B,0.0,0.95,A,0.85,A,0.95,B,0.9,B
Ministral-8B-Instruct-2410,test/prealgebra/1908.json,Prealgebra,general,B,1.0,0.95,B,1.0,B,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/prealgebra/1922.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/1907.json,Prealgebra,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/2086.json,Prealgebra,general,B,1.0,0.95,B,1.0,B,1.0,B,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/378.json,Prealgebra,general,A,0.0,1.0,B,0.8,B,0.85,,,A
Ministral-8B-Instruct-2410,test/prealgebra/1555.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/1436.json,Prealgebra,general,B,1.0,1.0,B,1.0,B,0.98,B,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/1961.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/2057.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/153.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/874.json,Prealgebra,general,B,1.0,1.0,B,0.6,B,0.6,,,B
Ministral-8B-Instruct-2410,test/prealgebra/1251.json,Prealgebra,general,B,1.0,0.9,B,0.95,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/1458.json,Prealgebra,general,B,0.0,0.95,A,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/1995.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/1317.json,Prealgebra,general,B,0.0,0.95,B,0.85,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/prealgebra/1742.json,Prealgebra,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/993.json,Prealgebra,general,B,0.0,0.95,A,0.9,A,0.98,A,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/1834.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/1512.json,Prealgebra,general,B,0.0,0.95,A,1.0,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/prealgebra/260.json,Prealgebra,general,B,1.0,0.95,B,0.85,B,0.85,,,A
Ministral-8B-Instruct-2410,test/prealgebra/1787.json,Prealgebra,general,B,0.0,0.9,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/prealgebra/1044.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,,,B
Ministral-8B-Instruct-2410,test/prealgebra/465.json,Prealgebra,general,A,0.0,0.95,B,0.75,B,0.95,A,0.6,B
Ministral-8B-Instruct-2410,test/prealgebra/1423.json,Prealgebra,general,B,0.0,0.95,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/prealgebra/954.json,Prealgebra,general,B,1.0,0.95,B,0.75,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/1973.json,Prealgebra,general,B,1.0,0.95,B,0.95,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/1730.json,Prealgebra,general,B,1.0,0.95,B,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/1238.json,Prealgebra,general,A,0.0,1.0,B,0.95,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/1353.json,Prealgebra,general,A,1.0,0.9,B,0.8,A,0.95,A,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/1187.json,Prealgebra,general,A,0.0,1.0,B,0.95,B,0.9,B,1.0,A
Ministral-8B-Instruct-2410,test/prealgebra/1743.json,Prealgebra,general,A,1.0,0.9,A,1.0,A,1.0,A,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/1865.json,Prealgebra,general,A,0.0,1.0,A,0.9,B,0.98,B,0.9,A
Ministral-8B-Instruct-2410,test/prealgebra/1298.json,Prealgebra,general,B,1.0,0.95,B,0.95,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/2066.json,Prealgebra,general,B,1.0,0.9,B,0.9,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/631.json,Prealgebra,general,B,1.0,0.95,B,0.75,B,0.6,B,0.7,A
Ministral-8B-Instruct-2410,test/prealgebra/977.json,Prealgebra,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/1991.json,Prealgebra,general,A,0.0,0.9,B,0.95,A,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/1784.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/1572.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/65.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/1227.json,Prealgebra,general,B,0.0,0.95,A,0.85,A,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/prealgebra/2019.json,Prealgebra,general,B,0.0,0.95,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/prealgebra/1640.json,Prealgebra,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,A
Ministral-8B-Instruct-2410,test/prealgebra/2037.json,Prealgebra,general,B,0.0,0.95,A,0.98,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/996.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/805.json,Prealgebra,general,B,1.0,0.95,B,1.0,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/914.json,Prealgebra,general,B,0.0,0.9,B,0.7,A,0.8,,,A
Ministral-8B-Instruct-2410,test/prealgebra/1114.json,Prealgebra,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/846.json,Prealgebra,general,B,0.0,1.0,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/prealgebra/1930.json,Prealgebra,general,B,0.0,0.95,B,0.75,A,0.85,,,B
Ministral-8B-Instruct-2410,test/prealgebra/1252.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/prealgebra/1203.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,B
Ministral-8B-Instruct-2410,test/prealgebra/1128.json,Prealgebra,general,A,1.0,0.9,A,0.75,A,0.95,A,0.8,A
Ministral-8B-Instruct-2410,test/geometry/248.json,Geometry,general,B,0.0,0.95,B,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/geometry/434.json,Geometry,general,A,0.0,0.95,B,0.85,B,0.9,,,A
Ministral-8B-Instruct-2410,test/geometry/967.json,Geometry,general,B,1.0,0.7,B,1.0,B,1.0,B,1.0,B
Ministral-8B-Instruct-2410,test/geometry/627.json,Geometry,general,B,1.0,0.95,B,0.7,B,0.7,B,0.8,B
Ministral-8B-Instruct-2410,test/geometry/178.json,Geometry,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/geometry/456.json,Geometry,general,B,0.0,0.9,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/geometry/353.json,Geometry,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/geometry/183.json,Geometry,general,A,1.0,0.95,A,0.75,A,0.6,,,B
Ministral-8B-Instruct-2410,test/geometry/283.json,Geometry,general,B,1.0,1.0,B,0.85,B,0.95,,,A
Ministral-8B-Instruct-2410,test/geometry/1140.json,Geometry,general,B,1.0,1.0,A,0.7,B,0.9,,,A
Ministral-8B-Instruct-2410,test/geometry/172.json,Geometry,general,B,1.0,0.9,B,0.95,B,0.95,,,B
Ministral-8B-Instruct-2410,test/geometry/880.json,Geometry,general,B,1.0,0.95,B,0.7,B,0.9,,,A
Ministral-8B-Instruct-2410,test/geometry/802.json,Geometry,general,B,1.0,0.95,B,1.0,B,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/geometry/65.json,Geometry,general,B,1.0,0.95,B,0.65,B,0.75,B,0.7,A
Ministral-8B-Instruct-2410,test/geometry/702.json,Geometry,general,B,1.0,0.85,B,0.6,B,0.85,,,B
Ministral-8B-Instruct-2410,test/geometry/221.json,Geometry,general,B,1.0,1.0,B,0.95,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/geometry/547.json,Geometry,general,B,1.0,1.0,B,0.65,B,0.85,,,B
Ministral-8B-Instruct-2410,test/geometry/229.json,Geometry,general,B,1.0,0.75,B,0.85,B,0.75,,,A
Ministral-8B-Instruct-2410,test/geometry/254.json,Geometry,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/geometry/473.json,Geometry,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/geometry/347.json,Geometry,general,B,1.0,0.9,B,0.95,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/geometry/483.json,Geometry,general,A,1.0,1.0,A,0.9,A,0.98,A,0.95,B
Ministral-8B-Instruct-2410,test/geometry/826.json,Geometry,general,A,1.0,1.0,A,0.6,B,0.6,,,A
Ministral-8B-Instruct-2410,test/geometry/226.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.95,B,0.8,B
Ministral-8B-Instruct-2410,test/geometry/686.json,Geometry,general,A,0.0,0.95,B,0.9,B,0.85,B,0.8,A
Ministral-8B-Instruct-2410,test/geometry/1097.json,Geometry,general,A,1.0,0.9,A,1.0,A,1.0,A,0.95,A
Ministral-8B-Instruct-2410,test/geometry/965.json,Geometry,general,A,0.0,0.95,B,0.85,B,0.9,,,B
Ministral-8B-Instruct-2410,test/geometry/711.json,Geometry,general,A,1.0,0.9,B,0.7,A,0.7,,,B
Ministral-8B-Instruct-2410,test/geometry/1108.json,Geometry,general,B,1.0,1.0,B,1.0,B,1.0,B,1.0,A
Ministral-8B-Instruct-2410,test/geometry/947.json,Geometry,general,A,0.0,1.0,B,0.6,B,0.5,,,B
Ministral-8B-Instruct-2410,test/geometry/465.json,Geometry,general,A,0.0,0.95,B,0.7,B,0.9,B,0.9,A
Ministral-8B-Instruct-2410,test/geometry/73.json,Geometry,general,B,1.0,0.9,B,0.98,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/geometry/106.json,Geometry,general,B,1.0,0.95,B,0.95,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/geometry/846.json,Geometry,general,B,1.0,0.95,B,0.7,B,0.8,,,B
Ministral-8B-Instruct-2410,test/geometry/538.json,Geometry,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,A
Ministral-8B-Instruct-2410,test/geometry/795.json,Geometry,general,A,0.0,1.0,A,0.4,B,0.85,,,B
Ministral-8B-Instruct-2410,test/geometry/817.json,Geometry,general,B,1.0,0.95,B,0.65,B,0.7,,,A
Ministral-8B-Instruct-2410,test/geometry/843.json,Geometry,general,A,0.0,1.0,B,0.95,A,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/geometry/477.json,Geometry,general,A,0.0,0.95,A,0.85,B,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/geometry/561.json,Geometry,general,A,0.0,0.95,B,0.75,B,0.75,,,B
Ministral-8B-Instruct-2410,test/geometry/615.json,Geometry,general,B,0.0,0.8,B,0.85,A,0.5,A,0.6,A
Ministral-8B-Instruct-2410,test/counting_and_probability/525.json,Counting & Probability,general,B,1.0,0.9,B,0.85,B,0.95,B,0.9,A
Ministral-8B-Instruct-2410,test/counting_and_probability/666.json,Counting & Probability,general,B,1.0,0.95,B,1.0,B,1.0,B,1.0,B
Ministral-8B-Instruct-2410,test/counting_and_probability/134.json,Counting & Probability,general,B,1.0,0.95,B,0.8,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/counting_and_probability/119.json,Counting & Probability,general,A,1.0,0.9,A,0.6,A,0.95,A,1.0,B
Ministral-8B-Instruct-2410,test/counting_and_probability/1114.json,Counting & Probability,general,B,0.0,1.0,B,0.8,A,0.95,A,1.0,B
Ministral-8B-Instruct-2410,test/counting_and_probability/377.json,Counting & Probability,general,A,1.0,0.95,B,0.85,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/counting_and_probability/23957.json,Counting & Probability,general,B,0.0,0.95,B,0.7,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/counting_and_probability/1060.json,Counting & Probability,general,B,0.0,0.95,A,1.0,A,1.0,A,1.0,A
Ministral-8B-Instruct-2410,test/counting_and_probability/430.json,Counting & Probability,general,B,1.0,1.0,B,0.65,A,0.9,B,0.7,B
Ministral-8B-Instruct-2410,test/counting_and_probability/159.json,Counting & Probability,general,B,1.0,0.9,B,0.95,B,0.98,B,1.0,B
Ministral-8B-Instruct-2410,test/counting_and_probability/230.json,Counting & Probability,general,B,0.0,0.95,A,0.95,B,0.9,A,1.0,A
Ministral-8B-Instruct-2410,test/counting_and_probability/803.json,Counting & Probability,general,A,0.0,0.95,B,0.85,B,0.95,A,1.0,B
Ministral-8B-Instruct-2410,test/counting_and_probability/181.json,Counting & Probability,general,B,1.0,0.9,B,0.75,B,0.85,,,A
Ministral-8B-Instruct-2410,test/counting_and_probability/51.json,Counting & Probability,general,B,1.0,1.0,B,0.8,B,0.95,B,0.7,B
Ministral-8B-Instruct-2410,test/counting_and_probability/508.json,Counting & Probability,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/counting_and_probability/389.json,Counting & Probability,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/counting_and_probability/765.json,Counting & Probability,general,B,1.0,0.95,B,0.85,A,0.95,B,0.7,B
Ministral-8B-Instruct-2410,test/counting_and_probability/282.json,Counting & Probability,general,B,1.0,0.9,B,0.8,B,0.75,,,B
Ministral-8B-Instruct-2410,test/counting_and_probability/71.json,Counting & Probability,general,A,1.0,1.0,A,1.0,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/counting_and_probability/894.json,Counting & Probability,general,B,0.0,0.9,A,0.85,A,0.95,A,0.8,A
Ministral-8B-Instruct-2410,test/counting_and_probability/1009.json,Counting & Probability,general,B,1.0,0.9,B,0.9,B,0.95,B,0.95,A
Ministral-8B-Instruct-2410,test/counting_and_probability/913.json,Counting & Probability,general,B,0.0,0.9,B,0.85,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/counting_and_probability/25149.json,Counting & Probability,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/counting_and_probability/339.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Ministral-8B-Instruct-2410,test/counting_and_probability/870.json,Counting & Probability,general,A,0.0,0.9,B,0.85,B,0.85,B,0.6,A
Ministral-8B-Instruct-2410,test/counting_and_probability/216.json,Counting & Probability,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,A
Ministral-8B-Instruct-2410,test/counting_and_probability/737.json,Counting & Probability,general,B,0.0,1.0,A,1.0,A,1.0,A,1.0,A
Ministral-8B-Instruct-2410,test/counting_and_probability/116.json,Counting & Probability,general,B,0.0,1.0,A,0.85,A,0.85,A,0.8,A
Ministral-8B-Instruct-2410,test/counting_and_probability/238.json,Counting & Probability,general,A,0.0,0.85,B,1.0,B,0.95,B,1.0,B
Ministral-8B-Instruct-2410,test/counting_and_probability/1014.json,Counting & Probability,general,B,1.0,0.9,B,0.95,A,0.95,B,0.9,B
Ministral-8B-Instruct-2410,test/counting_and_probability/14.json,Counting & Probability,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
Ministral-8B-Instruct-2410,test/counting_and_probability/188.json,Counting & Probability,general,B,1.0,0.95,B,0.75,B,0.7,,,A
Ministral-8B-Instruct-2410,test/counting_and_probability/761.json,Counting & Probability,general,A,1.0,0.9,A,0.85,A,0.95,B,0.95,B
Ministral-8B-Instruct-2410,test/counting_and_probability/10.json,Counting & Probability,general,B,1.0,0.9,B,0.95,B,0.95,B,1.0,A
Ministral-8B-Instruct-2410,test/counting_and_probability/731.json,Counting & Probability,general,B,1.0,0.95,B,0.85,B,0.6,,,B
Ministral-8B-Instruct-2410,test/counting_and_probability/190.json,Counting & Probability,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,A
Ministral-8B-Instruct-2410,test/counting_and_probability/1003.json,Counting & Probability,general,B,1.0,1.0,A,0.7,B,0.85,B,0.7,B
Ministral-8B-Instruct-2410,test/counting_and_probability/199.json,Counting & Probability,general,A,1.0,0.95,B,0.7,A,0.75,A,0.6,B
Open-Reasoner-Zero-7B,test/precalculus/807.json,Precalculus,general,B,1.0,0.95,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/927.json,Precalculus,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,A
Open-Reasoner-Zero-7B,test/precalculus/1303.json,Precalculus,general,B,1.0,0.95,B,0.6,B,0.7,,,B
Open-Reasoner-Zero-7B,test/precalculus/990.json,Precalculus,general,A,1.0,1.0,A,0.95,A,0.99,A,1.0,A
Open-Reasoner-Zero-7B,test/precalculus/1199.json,Precalculus,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/779.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/285.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/1105.json,Precalculus,general,A,1.0,1.0,B,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/675.json,Precalculus,general,B,1.0,0.95,B,1.0,B,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/precalculus/1146.json,Precalculus,general,A,1.0,0.85,A,0.95,A,0.95,,,A
Open-Reasoner-Zero-7B,test/precalculus/1313.json,Precalculus,general,B,1.0,0.85,B,0.75,B,0.75,A,0.6,A
Open-Reasoner-Zero-7B,test/precalculus/24313.json,Precalculus,general,A,1.0,0.95,A,0.55,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/34.json,Precalculus,general,A,1.0,1.0,A,0.6,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/precalculus/1300.json,Precalculus,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/44.json,Precalculus,general,A,1.0,0.95,A,0.55,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/precalculus/477.json,Precalculus,general,B,1.0,1.0,B,0.95,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/43.json,Precalculus,general,A,0.0,1.0,A,0.85,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/precalculus/986.json,Precalculus,general,B,0.0,0.95,B,0.85,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/precalculus/117.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.95,B,0.9,B
Open-Reasoner-Zero-7B,test/precalculus/697.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.85,A,0.9,A
Open-Reasoner-Zero-7B,test/precalculus/659.json,Precalculus,general,A,1.0,1.0,A,0.75,A,0.98,A,1.0,A
Open-Reasoner-Zero-7B,test/precalculus/263.json,Precalculus,general,A,1.0,1.0,A,0.95,A,0.99,A,0.95,B
Open-Reasoner-Zero-7B,test/precalculus/541.json,Precalculus,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/190.json,Precalculus,general,A,0.0,0.75,B,0.85,B,0.85,B,0.7,A
Open-Reasoner-Zero-7B,test/precalculus/819.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.98,A,0.9,A
Open-Reasoner-Zero-7B,test/precalculus/1056.json,Precalculus,general,A,1.0,1.0,A,0.95,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/precalculus/441.json,Precalculus,general,A,1.0,1.0,A,0.9,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/989.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.85,A,0.9,B
Open-Reasoner-Zero-7B,test/precalculus/920.json,Precalculus,general,B,1.0,0.95,B,0.75,B,0.7,,,B
Open-Reasoner-Zero-7B,test/precalculus/452.json,Precalculus,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/580.json,Precalculus,general,A,0.0,0.95,A,0.9,B,0.95,B,0.9,B
Open-Reasoner-Zero-7B,test/precalculus/768.json,Precalculus,general,A,0.0,0.9,B,0.65,B,0.95,,,A
Open-Reasoner-Zero-7B,test/precalculus/1172.json,Precalculus,general,A,0.0,0.95,B,0.95,B,1.0,B,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/1201.json,Precalculus,general,B,1.0,0.85,B,0.65,B,0.6,,,A
Open-Reasoner-Zero-7B,test/precalculus/881.json,Precalculus,general,A,1.0,1.0,B,0.55,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/precalculus/695.json,Precalculus,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/742.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.98,B,1.0,B
Open-Reasoner-Zero-7B,test/precalculus/801.json,Precalculus,general,B,1.0,0.95,B,0.85,B,0.7,B,0.65,B
Open-Reasoner-Zero-7B,test/precalculus/826.json,Precalculus,general,A,0.0,0.9,A,0.9,B,0.9,B,0.9,A
Open-Reasoner-Zero-7B,test/precalculus/1281.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.95,B,0.7,B
Open-Reasoner-Zero-7B,test/precalculus/96.json,Precalculus,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/precalculus/1289.json,Precalculus,general,A,1.0,0.9,A,0.85,A,0.95,A,0.9,A
Open-Reasoner-Zero-7B,test/precalculus/902.json,Precalculus,general,B,1.0,0.85,B,0.65,B,0.95,,,A
Open-Reasoner-Zero-7B,test/precalculus/1291.json,Precalculus,general,A,1.0,1.0,A,0.85,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/precalculus/398.json,Precalculus,general,A,0.0,0.95,B,0.8,A,0.98,B,0.9,B
Open-Reasoner-Zero-7B,test/precalculus/681.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/precalculus/145.json,Precalculus,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/625.json,Precalculus,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/precalculus/1202.json,Precalculus,general,A,1.0,0.95,A,0.9,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/precalculus/1133.json,Precalculus,general,B,0.0,0.9,B,0.75,A,0.9,A,0.95,B
Open-Reasoner-Zero-7B,test/precalculus/499.json,Precalculus,general,A,1.0,0.95,A,0.6,A,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/precalculus/323.json,Precalculus,general,A,0.0,0.95,B,0.8,A,0.98,B,0.75,A
Open-Reasoner-Zero-7B,test/precalculus/703.json,Precalculus,general,A,0.0,0.95,B,0.7,B,0.85,,,B
Open-Reasoner-Zero-7B,test/precalculus/1252.json,Precalculus,general,A,1.0,0.95,A,0.9,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/precalculus/1082.json,Precalculus,general,A,1.0,0.95,B,0.6,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/precalculus/356.json,Precalculus,general,A,1.0,0.95,B,0.8,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1994.json,Intermediate Algebra,general,A,0.0,1.0,B,0.6,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1197.json,Intermediate Algebra,general,A,1.0,0.95,A,0.7,A,0.9,,,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/134.json,Intermediate Algebra,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1000.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/607.json,Intermediate Algebra,general,A,0.0,1.0,B,0.95,B,0.98,B,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1388.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.85,B,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/428.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1454.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.7,,,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1217.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.9,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1168.json,Intermediate Algebra,general,A,1.0,0.95,A,0.8,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/956.json,Intermediate Algebra,general,A,0.0,0.95,B,0.7,B,0.7,,,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1247.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/279.json,Intermediate Algebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/207.json,Intermediate Algebra,general,A,0.0,1.0,B,0.85,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/623.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/47.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,A,0.98,B,1.0,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1849.json,Intermediate Algebra,general,A,0.0,0.95,B,0.55,B,0.7,,,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/2046.json,Intermediate Algebra,general,A,1.0,0.85,A,1.0,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/662.json,Intermediate Algebra,general,A,1.0,0.5,A,0.8,B,0.4,A,0.6,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/582.json,Intermediate Algebra,general,A,0.0,0.95,A,0.7,B,0.75,,,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/431.json,Intermediate Algebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/558.json,Intermediate Algebra,general,A,0.0,0.95,B,0.65,B,0.6,A,0.6,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/362.json,Intermediate Algebra,general,B,1.0,0.85,B,0.7,A,0.65,,,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/515.json,Intermediate Algebra,general,A,0.0,1.0,B,0.95,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/894.json,Intermediate Algebra,general,A,0.0,0.95,B,0.75,B,0.95,,,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/345.json,Intermediate Algebra,general,B,0.0,0.6,A,0.65,B,0.6,A,0.6,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1898.json,Intermediate Algebra,general,A,0.0,1.0,B,0.85,B,0.99,B,1.0,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/232.json,Intermediate Algebra,general,A,1.0,0.95,A,1.0,A,0.98,A,1.0,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/128.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1063.json,Intermediate Algebra,general,B,0.0,0.95,A,0.85,A,0.75,A,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1126.json,Intermediate Algebra,general,A,1.0,0.95,B,0.6,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/2022.json,Intermediate Algebra,general,B,1.0,0.85,B,0.95,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1151.json,Intermediate Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1408.json,Intermediate Algebra,general,A,0.0,0.9,B,0.6,B,0.7,,,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/966.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/964.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,A,0.85,,,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1410.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/991.json,Intermediate Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/183.json,Intermediate Algebra,general,A,0.0,0.9,B,0.6,B,0.7,,,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1422.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/2196.json,Intermediate Algebra,general,B,1.0,0.85,B,0.85,B,0.75,B,0.7,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/591.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1555.json,Intermediate Algebra,general,A,1.0,1.0,A,0.95,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1510.json,Intermediate Algebra,general,A,1.0,0.75,A,0.6,A,0.7,,,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/102.json,Intermediate Algebra,general,B,0.0,0.95,A,0.75,B,0.95,A,0.9,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/986.json,Intermediate Algebra,general,B,0.0,0.95,A,0.95,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1354.json,Intermediate Algebra,general,B,0.0,0.95,A,0.9,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1837.json,Intermediate Algebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/337.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1210.json,Intermediate Algebra,general,A,0.0,1.0,B,0.85,B,0.85,,,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1123.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/149.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1411.json,Intermediate Algebra,general,B,0.0,0.9,A,0.65,A,0.75,,,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/960.json,Intermediate Algebra,general,B,1.0,0.6,B,0.65,B,0.6,B,0.5,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1300.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/90.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.8,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/754.json,Intermediate Algebra,general,B,0.0,0.95,A,1.0,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/446.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1544.json,Intermediate Algebra,general,A,0.0,0.95,B,0.7,B,0.7,,,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1714.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/2152.json,Intermediate Algebra,general,A,0.0,0.9,B,0.95,A,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/117.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.9,,,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/190.json,Intermediate Algebra,general,B,1.0,0.9,B,0.7,B,0.7,,,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/776.json,Intermediate Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1566.json,Intermediate Algebra,general,A,0.0,1.0,B,0.85,A,0.98,B,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1572.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1166.json,Intermediate Algebra,general,B,1.0,0.95,B,0.75,B,0.6,B,0.6,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/860.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1407.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1405.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/690.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/955.json,Intermediate Algebra,general,A,0.0,0.95,B,0.65,B,0.6,B,0.6,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1992.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1111.json,Intermediate Algebra,general,A,1.0,1.0,A,0.75,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1791.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1806.json,Intermediate Algebra,general,A,1.0,0.85,B,0.6,A,0.85,,,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1797.json,Intermediate Algebra,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/2146.json,Intermediate Algebra,general,A,0.0,0.9,B,0.95,B,0.98,B,1.0,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/2015.json,Intermediate Algebra,general,B,1.0,0.85,B,0.6,B,0.85,,,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/121.json,Intermediate Algebra,general,A,0.0,1.0,A,0.85,B,0.98,B,1.0,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1014.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1462.json,Intermediate Algebra,general,A,1.0,0.6,A,0.7,B,0.6,A,0.7,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/199.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1779.json,Intermediate Algebra,general,A,0.0,0.85,B,0.7,B,0.9,,,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1102.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/834.json,Intermediate Algebra,general,A,0.0,0.95,B,0.55,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/158.json,Intermediate Algebra,general,B,1.0,0.95,B,0.9,B,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/752.json,Intermediate Algebra,general,A,0.0,0.95,B,0.75,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1279.json,Intermediate Algebra,general,A,1.0,0.75,A,0.6,B,0.6,A,0.6,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1467.json,Intermediate Algebra,general,A,0.0,0.7,B,0.65,B,0.7,,,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/101.json,Intermediate Algebra,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1365.json,Intermediate Algebra,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1350.json,Intermediate Algebra,general,A,1.0,0.85,A,1.0,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1930.json,Intermediate Algebra,general,B,1.0,0.85,B,0.85,B,0.85,,,A
Open-Reasoner-Zero-7B,test/intermediate_algebra/1981.json,Intermediate Algebra,general,A,1.0,0.95,A,0.55,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1232.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/intermediate_algebra/1508.json,Intermediate Algebra,general,A,0.0,1.0,B,0.6,B,0.95,,,A
Open-Reasoner-Zero-7B,test/algebra/2584.json,Algebra,general,A,1.0,1.0,A,0.9,B,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/1349.json,Algebra,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,B
Open-Reasoner-Zero-7B,test/algebra/2036.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/1098.json,Algebra,general,A,1.0,1.0,B,0.8,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/1837.json,Algebra,general,A,0.0,0.95,B,0.75,A,0.98,B,0.9,B
Open-Reasoner-Zero-7B,test/algebra/2193.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/2427.json,Algebra,general,B,1.0,0.9,B,1.0,B,0.98,B,1.0,A
Open-Reasoner-Zero-7B,test/algebra/1072.json,Algebra,general,B,1.0,0.95,A,0.85,B,0.95,B,0.9,A
Open-Reasoner-Zero-7B,test/algebra/24.json,Algebra,general,A,1.0,0.95,A,0.85,B,0.95,A,0.9,B
Open-Reasoner-Zero-7B,test/algebra/2214.json,Algebra,general,A,0.0,0.95,B,0.65,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/305.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/algebra/1265.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/187.json,Algebra,general,A,1.0,0.9,A,0.6,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/769.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/722.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.9,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2046.json,Algebra,general,A,0.0,0.95,B,0.9,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2253.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.98,A,1.0,B
Open-Reasoner-Zero-7B,test/algebra/1004.json,Algebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/1035.json,Algebra,general,A,1.0,1.0,A,0.6,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2700.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/algebra/893.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/567.json,Algebra,general,A,0.0,1.0,B,0.85,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/892.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/2023.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/873.json,Algebra,general,A,1.0,0.95,B,0.51,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2058.json,Algebra,general,A,1.0,0.9,A,0.7,A,0.85,A,0.8,A
Open-Reasoner-Zero-7B,test/algebra/2593.json,Algebra,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2157.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/2251.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/1332.json,Algebra,general,A,1.0,1.0,A,0.55,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/972.json,Algebra,general,A,0.0,0.95,B,0.9,A,0.98,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2232.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/661.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/246.json,Algebra,general,A,0.0,1.0,B,0.95,A,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/algebra/1519.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/988.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.9,A
Open-Reasoner-Zero-7B,test/algebra/2570.json,Algebra,general,A,1.0,1.0,A,0.65,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/621.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/1255.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/2517.json,Algebra,general,A,0.0,1.0,B,0.85,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/478.json,Algebra,general,A,1.0,1.0,B,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/297.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.9,,,A
Open-Reasoner-Zero-7B,test/algebra/841.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.98,B,1.0,A
Open-Reasoner-Zero-7B,test/algebra/686.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/351.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.98,A,1.0,B
Open-Reasoner-Zero-7B,test/algebra/1275.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/1082.json,Algebra,general,A,0.0,1.0,B,0.9,B,0.98,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/1214.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/algebra/2199.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.98,B,1.0,A
Open-Reasoner-Zero-7B,test/algebra/733.json,Algebra,general,A,0.0,0.5,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/109.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/1937.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.98,B,1.0,A
Open-Reasoner-Zero-7B,test/algebra/291.json,Algebra,general,A,0.0,1.0,B,0.9,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/2102.json,Algebra,general,A,0.0,1.0,B,0.85,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/907.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/864.json,Algebra,general,A,0.0,0.95,B,0.6,B,0.98,B,1.0,B
Open-Reasoner-Zero-7B,test/algebra/2159.json,Algebra,general,A,1.0,0.95,A,0.65,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/algebra/1578.json,Algebra,general,A,1.0,1.0,A,0.75,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/975.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/1143.json,Algebra,general,A,0.0,1.0,B,0.6,A,0.98,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2626.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/1787.json,Algebra,general,A,0.0,0.9,B,1.0,B,1.0,B,1.0,A
Open-Reasoner-Zero-7B,test/algebra/1934.json,Algebra,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/2064.json,Algebra,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,B
Open-Reasoner-Zero-7B,test/algebra/694.json,Algebra,general,A,0.0,1.0,B,0.55,A,1.0,B,0.9,A
Open-Reasoner-Zero-7B,test/algebra/524.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.98,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2551.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/346.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/1282.json,Algebra,general,A,0.0,0.95,B,0.8,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/1184.json,Algebra,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,A
Open-Reasoner-Zero-7B,test/algebra/634.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2486.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/algebra/2257.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/1842.json,Algebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/791.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/276.json,Algebra,general,B,0.0,0.95,B,0.85,A,0.95,A,0.9,B
Open-Reasoner-Zero-7B,test/algebra/2735.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/algebra/425.json,Algebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.8,A
Open-Reasoner-Zero-7B,test/algebra/1936.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.9,B
Open-Reasoner-Zero-7B,test/algebra/2176.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/509.json,Algebra,general,B,1.0,0.9,B,0.95,B,0.98,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/1457.json,Algebra,general,A,1.0,1.0,B,0.95,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/algebra/2592.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/algebra/858.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.9,A
Open-Reasoner-Zero-7B,test/algebra/1529.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/1338.json,Algebra,general,A,1.0,1.0,A,0.8,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/1547.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/529.json,Algebra,general,A,0.0,1.0,B,0.85,A,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/algebra/1078.json,Algebra,general,A,1.0,0.9,A,0.51,A,0.99,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/251.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/algebra/1199.json,Algebra,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/2264.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/1303.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/101.json,Algebra,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/170.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.9,B
Open-Reasoner-Zero-7B,test/algebra/849.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/1031.json,Algebra,general,A,1.0,0.95,B,0.6,A,1.0,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/853.json,Algebra,general,A,1.0,0.95,B,0.55,A,0.98,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/2277.json,Algebra,general,A,0.0,1.0,B,1.0,B,0.99,B,1.0,A
Open-Reasoner-Zero-7B,test/algebra/518.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/algebra/114.json,Algebra,general,A,1.0,1.0,A,0.9,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/1960.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/2680.json,Algebra,general,A,1.0,1.0,B,0.6,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2391.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/algebra/776.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.98,A,1.0,B
Open-Reasoner-Zero-7B,test/algebra/1796.json,Algebra,general,A,1.0,0.95,B,0.55,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/1339.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2743.json,Algebra,general,A,0.0,1.0,B,0.95,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/2043.json,Algebra,general,A,1.0,1.0,A,0.8,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/1553.json,Algebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/algebra/2080.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/1343.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/algebra/668.json,Algebra,general,A,1.0,1.0,A,0.9,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2430.json,Algebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2789.json,Algebra,general,A,0.0,1.0,B,0.85,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/1814.json,Algebra,general,A,1.0,1.0,A,0.51,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/algebra/2476.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/2780.json,Algebra,general,A,0.0,0.95,B,0.75,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/824.json,Algebra,general,A,1.0,0.95,A,0.55,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/algebra/1425.json,Algebra,general,A,0.0,0.95,B,0.55,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/algebra/224.json,Algebra,general,A,0.0,1.0,B,0.85,B,0.95,B,0.9,A
Open-Reasoner-Zero-7B,test/algebra/435.json,Algebra,general,A,0.0,1.0,B,0.95,B,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/algebra/2470.json,Algebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/algebra/2779.json,Algebra,general,A,0.0,0.95,B,0.55,B,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/number_theory/572.json,Number Theory,general,A,0.0,1.0,B,0.65,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/515.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/1032.json,Number Theory,general,A,0.0,1.0,B,0.85,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/737.json,Number Theory,general,A,1.0,0.95,A,0.9,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/864.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.98,B,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/627.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/45.json,Number Theory,general,A,1.0,0.95,A,0.8,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/1055.json,Number Theory,general,B,1.0,0.9,B,0.95,B,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/number_theory/46.json,Number Theory,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/516.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/357.json,Number Theory,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/914.json,Number Theory,general,A,1.0,1.0,A,0.8,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/number_theory/847.json,Number Theory,general,A,0.0,0.95,B,0.9,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/753.json,Number Theory,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/1257.json,Number Theory,general,A,1.0,0.95,A,0.55,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/156.json,Number Theory,general,A,0.0,0.95,B,0.6,B,0.95,B,0.9,B
Open-Reasoner-Zero-7B,test/number_theory/612.json,Number Theory,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/931.json,Number Theory,general,A,1.0,1.0,B,0.51,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/521.json,Number Theory,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/598.json,Number Theory,general,B,0.0,0.95,A,0.95,A,0.85,A,0.9,A
Open-Reasoner-Zero-7B,test/number_theory/978.json,Number Theory,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/838.json,Number Theory,general,A,0.0,1.0,B,0.85,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/149.json,Number Theory,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/number_theory/1201.json,Number Theory,general,A,1.0,1.0,B,0.55,A,1.0,A,1.0,B
Open-Reasoner-Zero-7B,test/number_theory/234.json,Number Theory,general,A,1.0,0.95,A,0.65,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/417.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/89.json,Number Theory,general,A,1.0,1.0,B,0.85,A,0.98,A,1.0,B
Open-Reasoner-Zero-7B,test/number_theory/183.json,Number Theory,general,B,1.0,0.9,B,0.95,B,0.98,B,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/1065.json,Number Theory,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/466.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/634.json,Number Theory,general,A,0.0,0.85,B,0.9,A,0.9,B,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/533.json,Number Theory,general,A,0.0,0.95,B,0.55,B,0.99,B,1.0,A
Open-Reasoner-Zero-7B,test/number_theory/691.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/1287.json,Number Theory,general,A,1.0,1.0,A,0.55,A,0.99,A,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/631.json,Number Theory,general,A,1.0,1.0,A,1.0,A,0.98,A,1.0,A
Open-Reasoner-Zero-7B,test/number_theory/488.json,Number Theory,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/1172.json,Number Theory,general,A,1.0,0.95,B,0.85,A,0.98,A,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/203.json,Number Theory,general,A,1.0,0.9,A,1.0,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/number_theory/911.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/483.json,Number Theory,general,A,1.0,0.95,A,0.51,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/368.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/number_theory/686.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/820.json,Number Theory,general,A,0.0,1.0,B,0.7,B,0.9,,,A
Open-Reasoner-Zero-7B,test/number_theory/109.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/number_theory/427.json,Number Theory,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/1185.json,Number Theory,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/928.json,Number Theory,general,B,1.0,1.0,B,1.0,B,0.98,B,1.0,A
Open-Reasoner-Zero-7B,test/number_theory/132.json,Number Theory,general,A,0.0,0.85,B,0.85,A,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/number_theory/769.json,Number Theory,general,A,1.0,0.95,A,0.6,A,0.98,B,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/1002.json,Number Theory,general,A,1.0,0.95,B,0.55,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/410.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/255.json,Number Theory,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/1000.json,Number Theory,general,A,0.0,0.95,B,0.51,A,0.98,B,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/13.json,Number Theory,general,A,1.0,0.95,B,0.55,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/459.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/number_theory/342.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/number_theory/679.json,Number Theory,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,B
Open-Reasoner-Zero-7B,test/number_theory/72.json,Number Theory,general,A,0.0,0.95,B,0.85,A,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/number_theory/22.json,Number Theory,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/number_theory/1128.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/number_theory/1090.json,Number Theory,general,A,0.0,0.95,B,0.85,A,0.99,B,1.0,B
Open-Reasoner-Zero-7B,test/number_theory/239.json,Number Theory,general,A,0.0,1.0,B,0.95,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1622.json,Prealgebra,general,A,1.0,0.95,B,0.55,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1139.json,Prealgebra,general,A,1.0,0.8,A,0.85,A,0.6,,,B
Open-Reasoner-Zero-7B,test/prealgebra/1840.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1302.json,Prealgebra,general,B,1.0,0.95,A,0.85,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/930.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1558.json,Prealgebra,general,A,0.0,1.0,B,0.85,B,0.99,B,1.0,A
Open-Reasoner-Zero-7B,test/prealgebra/1388.json,Prealgebra,general,A,1.0,1.0,B,0.75,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/951.json,Prealgebra,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/572.json,Prealgebra,general,A,0.0,0.95,A,0.85,B,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/prealgebra/1247.json,Prealgebra,general,A,0.0,1.0,B,0.9,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1747.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/prealgebra/1233.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/192.json,Prealgebra,general,A,1.0,1.0,B,0.75,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/307.json,Prealgebra,general,A,0.0,0.95,A,0.95,B,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/prealgebra/1761.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.6,A
Open-Reasoner-Zero-7B,test/prealgebra/1646.json,Prealgebra,general,B,1.0,0.95,B,0.85,B,0.85,,,B
Open-Reasoner-Zero-7B,test/prealgebra/105.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.98,B,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1924.json,Prealgebra,general,A,0.0,1.0,A,0.75,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1804.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/prealgebra/1733.json,Prealgebra,general,B,0.0,0.95,A,0.85,A,0.7,A,0.6,A
Open-Reasoner-Zero-7B,test/prealgebra/505.json,Prealgebra,general,A,1.0,1.0,A,0.9,A,0.95,B,0.9,B
Open-Reasoner-Zero-7B,test/prealgebra/1686.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.98,B,1.0,A
Open-Reasoner-Zero-7B,test/prealgebra/1807.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1297.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1655.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1356.json,Prealgebra,general,A,1.0,0.95,A,1.0,A,1.0,A,1.0,A
Open-Reasoner-Zero-7B,test/prealgebra/1003.json,Prealgebra,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1272.json,Prealgebra,general,A,0.0,1.0,B,0.8,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1113.json,Prealgebra,general,A,0.0,0.95,B,0.95,A,0.99,B,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1908.json,Prealgebra,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/prealgebra/1922.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1907.json,Prealgebra,general,A,1.0,0.95,B,0.9,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/prealgebra/2086.json,Prealgebra,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/378.json,Prealgebra,general,A,0.0,0.95,B,1.0,B,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/prealgebra/1555.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/prealgebra/1436.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1961.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,1.0,A,1.0,B
Open-Reasoner-Zero-7B,test/prealgebra/2057.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.98,A,0.9,A
Open-Reasoner-Zero-7B,test/prealgebra/153.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/prealgebra/874.json,Prealgebra,general,A,0.0,1.0,B,0.85,A,0.6,,,B
Open-Reasoner-Zero-7B,test/prealgebra/1251.json,Prealgebra,general,A,1.0,1.0,B,0.55,A,0.98,A,1.0,B
Open-Reasoner-Zero-7B,test/prealgebra/1458.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1995.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1317.json,Prealgebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1742.json,Prealgebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/993.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.98,B,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1834.json,Prealgebra,general,A,1.0,0.9,B,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1512.json,Prealgebra,general,A,1.0,0.95,A,0.7,A,1.0,,,B
Open-Reasoner-Zero-7B,test/prealgebra/260.json,Prealgebra,general,B,1.0,0.95,B,0.8,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1787.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.9,B
Open-Reasoner-Zero-7B,test/prealgebra/1044.json,Prealgebra,general,B,1.0,0.95,B,0.55,B,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/465.json,Prealgebra,general,A,0.0,0.9,B,0.9,B,0.85,B,0.8,B
Open-Reasoner-Zero-7B,test/prealgebra/1423.json,Prealgebra,general,A,1.0,0.95,A,0.75,A,0.95,B,0.9,A
Open-Reasoner-Zero-7B,test/prealgebra/954.json,Prealgebra,general,A,0.0,1.0,B,0.55,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1973.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1730.json,Prealgebra,general,A,0.0,1.0,B,0.85,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1238.json,Prealgebra,general,A,0.0,0.95,B,0.6,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1353.json,Prealgebra,general,A,1.0,1.0,A,1.0,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/prealgebra/1187.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,,,B
Open-Reasoner-Zero-7B,test/prealgebra/1743.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1865.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.98,A,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1298.json,Prealgebra,general,A,1.0,0.85,B,0.85,A,0.95,A,0.9,A
Open-Reasoner-Zero-7B,test/prealgebra/2066.json,Prealgebra,general,A,0.0,1.0,B,0.9,B,0.98,B,1.0,A
Open-Reasoner-Zero-7B,test/prealgebra/631.json,Prealgebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/977.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1991.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.9,B
Open-Reasoner-Zero-7B,test/prealgebra/1784.json,Prealgebra,general,A,1.0,0.95,A,0.6,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1572.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.98,A,0.9,B
Open-Reasoner-Zero-7B,test/prealgebra/65.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1227.json,Prealgebra,general,A,0.0,1.0,B,0.95,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/2019.json,Prealgebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1640.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/2037.json,Prealgebra,general,A,1.0,0.95,A,0.75,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/996.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/805.json,Prealgebra,general,A,1.0,1.0,A,0.75,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/914.json,Prealgebra,general,A,1.0,0.95,B,0.8,A,0.85,,,B
Open-Reasoner-Zero-7B,test/prealgebra/1114.json,Prealgebra,general,B,1.0,0.9,B,1.0,B,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/prealgebra/846.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1930.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/prealgebra/1252.json,Prealgebra,general,A,0.0,0.95,B,0.9,A,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/prealgebra/1203.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/prealgebra/1128.json,Prealgebra,general,A,1.0,1.0,A,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/geometry/248.json,Geometry,general,A,1.0,1.0,A,0.85,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/geometry/434.json,Geometry,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/geometry/967.json,Geometry,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/geometry/627.json,Geometry,general,A,1.0,0.85,A,0.9,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/geometry/178.json,Geometry,general,A,1.0,1.0,A,0.75,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/geometry/456.json,Geometry,general,A,1.0,1.0,B,0.51,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/geometry/353.json,Geometry,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/geometry/183.json,Geometry,general,A,0.0,0.95,B,0.65,A,0.5,,,B
Open-Reasoner-Zero-7B,test/geometry/283.json,Geometry,general,A,1.0,1.0,A,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/geometry/1140.json,Geometry,general,B,1.0,0.85,B,0.95,B,0.95,B,0.9,B
Open-Reasoner-Zero-7B,test/geometry/172.json,Geometry,general,B,1.0,0.95,B,0.98,B,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/geometry/880.json,Geometry,general,A,1.0,0.95,B,0.75,A,0.85,,,B
Open-Reasoner-Zero-7B,test/geometry/802.json,Geometry,general,A,0.0,0.95,B,0.95,A,0.95,B,1.0,A
Open-Reasoner-Zero-7B,test/geometry/65.json,Geometry,general,A,0.0,0.85,B,0.85,A,0.65,B,0.7,B
Open-Reasoner-Zero-7B,test/geometry/702.json,Geometry,general,B,1.0,0.9,B,0.95,B,0.95,B,0.7,A
Open-Reasoner-Zero-7B,test/geometry/221.json,Geometry,general,A,1.0,0.95,A,0.7,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/geometry/547.json,Geometry,general,B,1.0,0.85,B,0.7,B,0.85,,,B
Open-Reasoner-Zero-7B,test/geometry/229.json,Geometry,general,A,1.0,0.9,A,0.85,A,0.85,,,B
Open-Reasoner-Zero-7B,test/geometry/254.json,Geometry,general,A,1.0,0.95,A,0.85,A,0.95,B,0.9,B
Open-Reasoner-Zero-7B,test/geometry/473.json,Geometry,general,A,1.0,1.0,B,0.85,A,0.98,A,0.95,A
Open-Reasoner-Zero-7B,test/geometry/347.json,Geometry,general,A,0.0,0.95,B,0.8,A,0.95,B,0.9,B
Open-Reasoner-Zero-7B,test/geometry/483.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/geometry/826.json,Geometry,general,A,0.0,0.95,B,0.55,B,0.6,A,0.6,B
Open-Reasoner-Zero-7B,test/geometry/226.json,Geometry,general,A,1.0,0.95,B,0.9,A,0.95,A,0.9,B
Open-Reasoner-Zero-7B,test/geometry/686.json,Geometry,general,B,1.0,0.95,B,0.85,B,0.85,B,0.8,B
Open-Reasoner-Zero-7B,test/geometry/1097.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/geometry/965.json,Geometry,general,A,0.0,0.95,B,0.7,B,0.7,B,0.7,B
Open-Reasoner-Zero-7B,test/geometry/711.json,Geometry,general,B,1.0,0.95,B,0.8,B,0.75,,,B
Open-Reasoner-Zero-7B,test/geometry/1108.json,Geometry,general,A,1.0,0.95,A,0.9,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/geometry/947.json,Geometry,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/geometry/465.json,Geometry,general,B,1.0,0.9,B,0.95,A,0.95,B,0.9,A
Open-Reasoner-Zero-7B,test/geometry/73.json,Geometry,general,A,1.0,1.0,A,0.95,A,0.95,B,0.9,B
Open-Reasoner-Zero-7B,test/geometry/106.json,Geometry,general,A,0.0,1.0,B,0.9,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/geometry/846.json,Geometry,general,B,1.0,0.9,B,0.7,B,0.6,,,A
Open-Reasoner-Zero-7B,test/geometry/538.json,Geometry,general,A,0.0,1.0,B,0.85,A,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/geometry/795.json,Geometry,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/geometry/817.json,Geometry,general,A,0.0,0.95,B,0.7,B,0.95,,,A
Open-Reasoner-Zero-7B,test/geometry/843.json,Geometry,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/geometry/477.json,Geometry,general,A,0.0,1.0,B,0.92,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/geometry/561.json,Geometry,general,A,0.0,0.95,B,0.65,B,0.65,,,A
Open-Reasoner-Zero-7B,test/geometry/615.json,Geometry,general,A,1.0,0.95,A,0.55,B,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/counting_and_probability/525.json,Counting & Probability,general,A,1.0,1.0,B,0.75,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/counting_and_probability/666.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/counting_and_probability/134.json,Counting & Probability,general,A,1.0,1.0,B,0.8,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/counting_and_probability/119.json,Counting & Probability,general,A,0.0,0.95,B,0.85,A,0.95,B,0.9,A
Open-Reasoner-Zero-7B,test/counting_and_probability/1114.json,Counting & Probability,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/counting_and_probability/377.json,Counting & Probability,general,A,0.0,1.0,B,0.95,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/counting_and_probability/23957.json,Counting & Probability,general,A,1.0,0.95,B,0.6,A,0.95,A,1.0,B
Open-Reasoner-Zero-7B,test/counting_and_probability/1060.json,Counting & Probability,general,A,0.0,0.95,B,0.85,B,0.9,B,0.65,A
Open-Reasoner-Zero-7B,test/counting_and_probability/430.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.85,A,0.7,B
Open-Reasoner-Zero-7B,test/counting_and_probability/159.json,Counting & Probability,general,A,1.0,0.95,B,0.55,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/counting_and_probability/230.json,Counting & Probability,general,A,0.0,0.95,B,0.85,A,0.95,B,0.9,B
Open-Reasoner-Zero-7B,test/counting_and_probability/803.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/counting_and_probability/181.json,Counting & Probability,general,B,1.0,0.4,B,0.75,A,0.6,,,A
Open-Reasoner-Zero-7B,test/counting_and_probability/51.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,B
Open-Reasoner-Zero-7B,test/counting_and_probability/508.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,A,1.0,A
Open-Reasoner-Zero-7B,test/counting_and_probability/389.json,Counting & Probability,general,A,1.0,0.95,B,0.55,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/counting_and_probability/765.json,Counting & Probability,general,A,1.0,0.85,B,0.65,A,0.95,,,B
Open-Reasoner-Zero-7B,test/counting_and_probability/282.json,Counting & Probability,general,A,1.0,1.0,A,0.7,B,0.65,,,A
Open-Reasoner-Zero-7B,test/counting_and_probability/71.json,Counting & Probability,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/counting_and_probability/894.json,Counting & Probability,general,A,1.0,1.0,A,0.85,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/counting_and_probability/1009.json,Counting & Probability,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/counting_and_probability/913.json,Counting & Probability,general,A,1.0,1.0,B,0.85,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/counting_and_probability/25149.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
Open-Reasoner-Zero-7B,test/counting_and_probability/339.json,Counting & Probability,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/counting_and_probability/870.json,Counting & Probability,general,B,1.0,0.95,B,0.65,B,0.9,B,0.8,A
Open-Reasoner-Zero-7B,test/counting_and_probability/216.json,Counting & Probability,general,A,0.0,0.95,B,0.95,A,0.95,B,1.0,B
Open-Reasoner-Zero-7B,test/counting_and_probability/737.json,Counting & Probability,general,A,1.0,1.0,A,0.85,A,0.98,A,1.0,B
Open-Reasoner-Zero-7B,test/counting_and_probability/116.json,Counting & Probability,general,A,1.0,1.0,B,0.55,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/counting_and_probability/238.json,Counting & Probability,general,A,0.0,0.95,B,0.85,B,0.9,B,0.7,B
Open-Reasoner-Zero-7B,test/counting_and_probability/1014.json,Counting & Probability,general,A,1.0,1.0,A,0.95,A,0.98,A,1.0,B
Open-Reasoner-Zero-7B,test/counting_and_probability/14.json,Counting & Probability,general,A,1.0,0.95,A,0.6,A,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/counting_and_probability/188.json,Counting & Probability,general,A,0.0,0.95,B,0.6,B,0.75,,,B
Open-Reasoner-Zero-7B,test/counting_and_probability/761.json,Counting & Probability,general,A,0.0,0.95,B,0.9,A,0.95,B,0.95,A
Open-Reasoner-Zero-7B,test/counting_and_probability/10.json,Counting & Probability,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
Open-Reasoner-Zero-7B,test/counting_and_probability/731.json,Counting & Probability,general,B,1.0,0.95,B,0.85,B,0.95,,,B
Open-Reasoner-Zero-7B,test/counting_and_probability/190.json,Counting & Probability,general,A,1.0,0.95,A,0.85,B,0.95,A,0.95,B
Open-Reasoner-Zero-7B,test/counting_and_probability/1003.json,Counting & Probability,general,A,1.0,0.85,A,0.7,A,0.85,,,B
Open-Reasoner-Zero-7B,test/counting_and_probability/199.json,Counting & Probability,general,A,0.0,0.95,B,1.0,B,0.9,B,1.0,A
s1.1-7B,test/precalculus/807.json,Precalculus,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/precalculus/927.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/precalculus/1303.json,Precalculus,general,A,1.0,0.95,A,1.0,A,0.98,A,1.0,B
s1.1-7B,test/precalculus/990.json,Precalculus,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/precalculus/1199.json,Precalculus,general,B,1.0,0.75,A,0.6,B,0.65,B,0.95,A
s1.1-7B,test/precalculus/779.json,Precalculus,general,A,0.0,0.95,B,0.8,B,0.95,A,0.95,B
s1.1-7B,test/precalculus/285.json,Precalculus,general,A,0.0,0.95,B,0.7,B,0.75,,,A
s1.1-7B,test/precalculus/1105.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
s1.1-7B,test/precalculus/675.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/precalculus/1146.json,Precalculus,general,B,1.0,0.9,B,0.8,B,0.85,A,0.85,B
s1.1-7B,test/precalculus/1313.json,Precalculus,general,B,1.0,0.85,B,0.85,B,0.9,B,0.95,B
s1.1-7B,test/precalculus/24313.json,Precalculus,general,B,1.0,0.95,B,1.0,B,0.95,B,1.0,A
s1.1-7B,test/precalculus/34.json,Precalculus,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
s1.1-7B,test/precalculus/1300.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/precalculus/44.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.9,A,0.9,B
s1.1-7B,test/precalculus/477.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/precalculus/43.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.98,B,0.95,B
s1.1-7B,test/precalculus/986.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/precalculus/117.json,Precalculus,general,A,1.0,0.95,A,1.0,A,0.95,A,0.95,A
s1.1-7B,test/precalculus/697.json,Precalculus,general,A,0.0,0.95,B,0.65,B,0.85,B,0.85,A
s1.1-7B,test/precalculus/659.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,B,1.0,B
s1.1-7B,test/precalculus/263.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
s1.1-7B,test/precalculus/541.json,Precalculus,general,A,0.0,0.95,B,0.8,B,0.95,A,1.0,B
s1.1-7B,test/precalculus/190.json,Precalculus,general,B,1.0,0.85,B,0.75,B,0.85,B,0.9,A
s1.1-7B,test/precalculus/819.json,Precalculus,general,A,1.0,0.85,A,1.0,A,0.95,A,1.0,A
s1.1-7B,test/precalculus/1056.json,Precalculus,general,A,0.0,0.95,A,0.65,B,0.85,,,B
s1.1-7B,test/precalculus/441.json,Precalculus,general,A,1.0,0.95,A,0.95,B,0.95,A,0.95,A
s1.1-7B,test/precalculus/989.json,Precalculus,general,B,1.0,0.85,B,0.65,B,0.9,B,0.85,B
s1.1-7B,test/precalculus/920.json,Precalculus,general,A,1.0,0.85,A,0.95,B,0.85,A,0.9,B
s1.1-7B,test/precalculus/452.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/precalculus/580.json,Precalculus,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,B
s1.1-7B,test/precalculus/768.json,Precalculus,general,B,1.0,0.85,B,0.9,B,0.92,B,0.95,A
s1.1-7B,test/precalculus/1172.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/precalculus/1201.json,Precalculus,general,B,1.0,0.9,B,0.65,B,0.85,,,B
s1.1-7B,test/precalculus/881.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/precalculus/695.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
s1.1-7B,test/precalculus/742.json,Precalculus,general,B,1.0,0.95,B,0.9,B,0.95,B,0.95,A
s1.1-7B,test/precalculus/801.json,Precalculus,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/precalculus/826.json,Precalculus,general,B,1.0,0.9,B,0.85,B,0.9,B,0.9,B
s1.1-7B,test/precalculus/1281.json,Precalculus,general,B,1.0,0.85,B,0.85,B,0.9,B,0.9,A
s1.1-7B,test/precalculus/96.json,Precalculus,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/precalculus/1289.json,Precalculus,general,A,1.0,0.95,A,0.85,A,0.95,B,0.95,B
s1.1-7B,test/precalculus/902.json,Precalculus,general,A,1.0,0.9,A,0.7,A,0.9,A,0.7,B
s1.1-7B,test/precalculus/1291.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,A
s1.1-7B,test/precalculus/398.json,Precalculus,general,A,1.0,0.95,A,0.9,A,0.95,B,0.95,B
s1.1-7B,test/precalculus/681.json,Precalculus,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
s1.1-7B,test/precalculus/145.json,Precalculus,general,B,1.0,0.9,B,0.95,B,0.85,B,0.9,A
s1.1-7B,test/precalculus/625.json,Precalculus,general,A,0.0,0.9,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/precalculus/1202.json,Precalculus,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
s1.1-7B,test/precalculus/1133.json,Precalculus,general,B,1.0,0.95,B,0.85,B,0.95,B,0.95,A
s1.1-7B,test/precalculus/499.json,Precalculus,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
s1.1-7B,test/precalculus/323.json,Precalculus,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
s1.1-7B,test/precalculus/703.json,Precalculus,general,A,0.0,0.85,B,0.85,B,0.9,,,B
s1.1-7B,test/precalculus/1252.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,A
s1.1-7B,test/precalculus/1082.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/precalculus/356.json,Precalculus,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/intermediate_algebra/1994.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/intermediate_algebra/1197.json,Intermediate Algebra,general,Tie,0.5,0.5,A,0.6,B,0.65,A,0.95,A
s1.1-7B,test/intermediate_algebra/134.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
s1.1-7B,test/intermediate_algebra/1000.json,Intermediate Algebra,general,A,1.0,0.95,A,1.0,A,0.75,B,0.95,B
s1.1-7B,test/intermediate_algebra/607.json,Intermediate Algebra,general,B,0.0,0.85,A,0.8,A,0.9,A,1.0,B
s1.1-7B,test/intermediate_algebra/1388.json,Intermediate Algebra,general,A,1.0,0.95,A,0.9,A,0.95,B,0.95,B
s1.1-7B,test/intermediate_algebra/428.json,Intermediate Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/1454.json,Intermediate Algebra,general,B,1.0,0.75,B,0.85,B,0.85,A,0.7,B
s1.1-7B,test/intermediate_algebra/1217.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/intermediate_algebra/1168.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/956.json,Intermediate Algebra,general,B,1.0,0.85,B,0.85,B,0.6,B,0.6,B
s1.1-7B,test/intermediate_algebra/1247.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
s1.1-7B,test/intermediate_algebra/279.json,Intermediate Algebra,general,B,1.0,0.95,B,1.0,B,0.95,B,1.0,A
s1.1-7B,test/intermediate_algebra/207.json,Intermediate Algebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/623.json,Intermediate Algebra,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/47.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.92,A,0.95,A
s1.1-7B,test/intermediate_algebra/1849.json,Intermediate Algebra,general,B,0.0,0.95,A,0.85,A,0.65,A,0.9,A
s1.1-7B,test/intermediate_algebra/2046.json,Intermediate Algebra,general,B,1.0,0.85,B,0.95,B,0.85,B,0.8,B
s1.1-7B,test/intermediate_algebra/662.json,Intermediate Algebra,general,B,1.0,0.85,B,0.75,B,0.85,B,0.95,B
s1.1-7B,test/intermediate_algebra/582.json,Intermediate Algebra,general,B,1.0,0.9,B,0.85,B,0.95,,,A
s1.1-7B,test/intermediate_algebra/431.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/558.json,Intermediate Algebra,general,A,0.0,0.95,B,0.65,B,0.7,B,0.9,A
s1.1-7B,test/intermediate_algebra/362.json,Intermediate Algebra,general,B,1.0,0.85,B,0.75,B,0.65,B,0.95,B
s1.1-7B,test/intermediate_algebra/515.json,Intermediate Algebra,general,B,0.0,0.95,B,0.9,A,0.95,A,0.95,A
s1.1-7B,test/intermediate_algebra/894.json,Intermediate Algebra,general,B,0.0,0.4,A,0.85,A,0.75,A,0.7,A
s1.1-7B,test/intermediate_algebra/345.json,Intermediate Algebra,general,B,1.0,0.85,A,0.65,B,0.85,B,0.95,B
s1.1-7B,test/intermediate_algebra/1898.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/232.json,Intermediate Algebra,general,A,0.0,0.95,B,0.75,B,0.8,A,0.9,A
s1.1-7B,test/intermediate_algebra/128.json,Intermediate Algebra,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,B
s1.1-7B,test/intermediate_algebra/1063.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.65,B,0.9,B
s1.1-7B,test/intermediate_algebra/1126.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/intermediate_algebra/2022.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.75,B,0.8,B
s1.1-7B,test/intermediate_algebra/1151.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
s1.1-7B,test/intermediate_algebra/1408.json,Intermediate Algebra,general,A,0.0,0.85,B,0.95,A,0.95,B,0.75,A
s1.1-7B,test/intermediate_algebra/966.json,Intermediate Algebra,general,A,0.0,1.0,B,0.85,B,0.92,,,A
s1.1-7B,test/intermediate_algebra/964.json,Intermediate Algebra,general,B,1.0,0.95,B,0.7,B,0.85,B,1.0,A
s1.1-7B,test/intermediate_algebra/1410.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/intermediate_algebra/991.json,Intermediate Algebra,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,A
s1.1-7B,test/intermediate_algebra/183.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.85,A,1.0,B
s1.1-7B,test/intermediate_algebra/1422.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.9,B
s1.1-7B,test/intermediate_algebra/2196.json,Intermediate Algebra,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/591.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,B
s1.1-7B,test/intermediate_algebra/1555.json,Intermediate Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,1.0,A
s1.1-7B,test/intermediate_algebra/1510.json,Intermediate Algebra,general,A,1.0,0.7,A,0.9,A,0.65,A,0.9,A
s1.1-7B,test/intermediate_algebra/102.json,Intermediate Algebra,general,B,1.0,0.9,B,1.0,B,0.95,B,0.95,B
s1.1-7B,test/intermediate_algebra/986.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/intermediate_algebra/1354.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/intermediate_algebra/1837.json,Intermediate Algebra,general,B,1.0,0.95,B,0.85,B,0.95,A,0.95,A
s1.1-7B,test/intermediate_algebra/337.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/intermediate_algebra/1210.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,B,0.95,A,1.0,B
s1.1-7B,test/intermediate_algebra/1123.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/149.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/intermediate_algebra/1411.json,Intermediate Algebra,general,B,1.0,0.85,B,0.9,B,0.85,B,0.8,B
s1.1-7B,test/intermediate_algebra/960.json,Intermediate Algebra,general,A,0.0,0.85,B,0.6,B,0.75,B,0.9,B
s1.1-7B,test/intermediate_algebra/1300.json,Intermediate Algebra,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/intermediate_algebra/90.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,1.0,A
s1.1-7B,test/intermediate_algebra/754.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.85,B,0.9,A
s1.1-7B,test/intermediate_algebra/446.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/1544.json,Intermediate Algebra,general,B,0.0,0.85,A,0.85,B,0.95,A,0.9,A
s1.1-7B,test/intermediate_algebra/1714.json,Intermediate Algebra,general,B,0.0,0.95,A,0.9,A,0.95,A,0.95,A
s1.1-7B,test/intermediate_algebra/2152.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,B
s1.1-7B,test/intermediate_algebra/117.json,Intermediate Algebra,general,B,1.0,0.85,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/intermediate_algebra/190.json,Intermediate Algebra,general,A,1.0,0.85,A,0.65,B,0.85,A,0.65,B
s1.1-7B,test/intermediate_algebra/776.json,Intermediate Algebra,general,A,1.0,0.85,A,0.9,A,0.7,A,0.95,A
s1.1-7B,test/intermediate_algebra/1566.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/intermediate_algebra/1572.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/intermediate_algebra/1166.json,Intermediate Algebra,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/intermediate_algebra/860.json,Intermediate Algebra,general,B,1.0,0.85,B,1.0,B,0.95,B,1.0,B
s1.1-7B,test/intermediate_algebra/1407.json,Intermediate Algebra,general,A,1.0,0.95,A,1.0,A,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/1405.json,Intermediate Algebra,general,B,1.0,0.85,B,0.75,B,0.65,B,0.7,B
s1.1-7B,test/intermediate_algebra/690.json,Intermediate Algebra,general,A,0.0,0.95,B,0.7,B,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/955.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.9,A,0.95,B
s1.1-7B,test/intermediate_algebra/1992.json,Intermediate Algebra,general,A,0.0,0.95,B,1.0,B,0.95,B,0.95,B
s1.1-7B,test/intermediate_algebra/1111.json,Intermediate Algebra,general,A,1.0,1.0,B,0.9,A,0.95,A,0.95,A
s1.1-7B,test/intermediate_algebra/1791.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.98,A,0.95,A
s1.1-7B,test/intermediate_algebra/1806.json,Intermediate Algebra,general,B,1.0,0.85,B,0.65,B,0.9,B,0.95,A
s1.1-7B,test/intermediate_algebra/1797.json,Intermediate Algebra,general,B,0.0,0.95,A,1.0,A,0.98,A,0.95,A
s1.1-7B,test/intermediate_algebra/2146.json,Intermediate Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/2015.json,Intermediate Algebra,general,A,0.0,0.85,B,0.8,B,0.85,B,0.7,B
s1.1-7B,test/intermediate_algebra/121.json,Intermediate Algebra,general,B,1.0,0.9,B,0.9,B,0.95,B,0.95,A
s1.1-7B,test/intermediate_algebra/1014.json,Intermediate Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/intermediate_algebra/1462.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/intermediate_algebra/199.json,Intermediate Algebra,general,B,0.0,0.95,A,0.85,B,0.75,A,0.8,A
s1.1-7B,test/intermediate_algebra/1779.json,Intermediate Algebra,general,A,0.0,0.85,B,0.8,B,0.95,B,0.95,A
s1.1-7B,test/intermediate_algebra/1102.json,Intermediate Algebra,general,B,1.0,0.95,B,0.98,B,0.95,B,0.95,A
s1.1-7B,test/intermediate_algebra/834.json,Intermediate Algebra,general,B,1.0,0.95,B,0.9,B,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/158.json,Intermediate Algebra,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,B
s1.1-7B,test/intermediate_algebra/752.json,Intermediate Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/1279.json,Intermediate Algebra,general,A,0.0,0.95,A,0.55,B,0.9,B,0.9,B
s1.1-7B,test/intermediate_algebra/1467.json,Intermediate Algebra,general,A,0.0,0.95,B,0.65,B,0.95,B,0.95,A
s1.1-7B,test/intermediate_algebra/101.json,Intermediate Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/intermediate_algebra/1365.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/intermediate_algebra/1350.json,Intermediate Algebra,general,A,1.0,0.75,A,0.7,A,0.85,B,0.95,A
s1.1-7B,test/intermediate_algebra/1930.json,Intermediate Algebra,general,B,1.0,0.85,B,0.85,B,0.95,B,0.95,A
s1.1-7B,test/intermediate_algebra/1981.json,Intermediate Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
s1.1-7B,test/intermediate_algebra/1232.json,Intermediate Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,1.0,B
s1.1-7B,test/intermediate_algebra/1508.json,Intermediate Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/algebra/2584.json,Algebra,general,A,1.0,0.9,A,0.85,B,0.95,A,0.95,B
s1.1-7B,test/algebra/1349.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.98,A,0.95,A
s1.1-7B,test/algebra/2036.json,Algebra,general,A,0.0,0.95,A,0.95,B,0.95,B,0.95,B
s1.1-7B,test/algebra/1098.json,Algebra,general,A,0.0,0.85,B,0.95,B,0.95,B,1.0,B
s1.1-7B,test/algebra/1837.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/2193.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/2427.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/algebra/1072.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,A
s1.1-7B,test/algebra/24.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/2214.json,Algebra,general,A,0.0,0.95,B,0.8,B,0.95,A,1.0,B
s1.1-7B,test/algebra/305.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.98,A,0.95,B
s1.1-7B,test/algebra/1265.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.98,B,1.0,B
s1.1-7B,test/algebra/187.json,Algebra,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
s1.1-7B,test/algebra/769.json,Algebra,general,B,0.0,0.95,B,0.85,A,0.95,A,0.95,A
s1.1-7B,test/algebra/722.json,Algebra,general,A,1.0,0.95,A,0.98,A,0.9,A,0.95,A
s1.1-7B,test/algebra/2046.json,Algebra,general,A,0.0,1.0,B,0.9,B,0.98,A,0.95,B
s1.1-7B,test/algebra/2253.json,Algebra,general,B,0.0,0.95,B,0.9,A,0.95,A,0.95,A
s1.1-7B,test/algebra/1004.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,B
s1.1-7B,test/algebra/1035.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
s1.1-7B,test/algebra/2700.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.9,A,0.95,A
s1.1-7B,test/algebra/893.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
s1.1-7B,test/algebra/567.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.98,A,0.95,A
s1.1-7B,test/algebra/892.json,Algebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/algebra/2023.json,Algebra,general,A,0.0,0.95,B,0.65,B,0.95,A,0.95,B
s1.1-7B,test/algebra/873.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/2058.json,Algebra,general,B,1.0,0.85,B,0.85,B,0.95,B,0.9,A
s1.1-7B,test/algebra/2593.json,Algebra,general,A,0.0,0.95,B,0.75,B,0.95,A,0.95,A
s1.1-7B,test/algebra/2157.json,Algebra,general,A,1.0,0.95,A,0.98,B,0.92,A,0.95,A
s1.1-7B,test/algebra/2251.json,Algebra,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/1332.json,Algebra,general,A,0.0,0.88,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/algebra/972.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.98,A,0.95,B
s1.1-7B,test/algebra/2232.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/algebra/661.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,A
s1.1-7B,test/algebra/246.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/1519.json,Algebra,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/988.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/2570.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/algebra/621.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,1.0,B
s1.1-7B,test/algebra/1255.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/2517.json,Algebra,general,B,0.0,0.6,A,0.95,A,0.75,A,1.0,B
s1.1-7B,test/algebra/478.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.98,A,0.95,A
s1.1-7B,test/algebra/297.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/algebra/841.json,Algebra,general,A,0.0,0.85,A,0.95,B,0.98,B,0.95,A
s1.1-7B,test/algebra/686.json,Algebra,general,A,1.0,0.95,A,0.85,B,0.95,A,0.95,A
s1.1-7B,test/algebra/351.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/1275.json,Algebra,general,A,0.0,0.9,B,0.75,B,0.9,A,0.95,B
s1.1-7B,test/algebra/1082.json,Algebra,general,A,0.0,0.9,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/algebra/1214.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,1.0,B
s1.1-7B,test/algebra/2199.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
s1.1-7B,test/algebra/733.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
s1.1-7B,test/algebra/109.json,Algebra,general,A,1.0,0.9,A,0.95,A,0.95,B,0.95,A
s1.1-7B,test/algebra/1937.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.9,A
s1.1-7B,test/algebra/291.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/algebra/2102.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,1.0,A
s1.1-7B,test/algebra/907.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
s1.1-7B,test/algebra/864.json,Algebra,general,B,0.0,0.95,A,0.95,B,0.98,A,0.95,A
s1.1-7B,test/algebra/2159.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
s1.1-7B,test/algebra/1578.json,Algebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/975.json,Algebra,general,A,1.0,0.95,A,0.75,A,0.95,B,0.95,A
s1.1-7B,test/algebra/1143.json,Algebra,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,B
s1.1-7B,test/algebra/2626.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.9,A
s1.1-7B,test/algebra/1787.json,Algebra,general,B,0.0,0.95,A,0.9,A,0.95,A,0.95,A
s1.1-7B,test/algebra/1934.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,1.0,B
s1.1-7B,test/algebra/2064.json,Algebra,general,B,0.0,0.95,B,0.55,A,0.95,A,0.95,A
s1.1-7B,test/algebra/694.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
s1.1-7B,test/algebra/524.json,Algebra,general,A,0.0,0.95,B,0.75,B,0.98,B,0.95,B
s1.1-7B,test/algebra/2551.json,Algebra,general,A,0.0,0.95,B,0.75,B,0.95,B,0.95,B
s1.1-7B,test/algebra/346.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/algebra/1282.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
s1.1-7B,test/algebra/1184.json,Algebra,general,A,0.0,1.0,B,1.0,B,0.95,B,1.0,A
s1.1-7B,test/algebra/634.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,A,0.95,A
s1.1-7B,test/algebra/2486.json,Algebra,general,B,1.0,0.85,B,0.95,A,0.95,B,0.95,B
s1.1-7B,test/algebra/2257.json,Algebra,general,B,0.0,0.95,A,0.85,A,0.95,A,0.95,A
s1.1-7B,test/algebra/1842.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,A,0.95,A
s1.1-7B,test/algebra/791.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/276.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
s1.1-7B,test/algebra/2735.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/425.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.98,A,0.95,B
s1.1-7B,test/algebra/1936.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,B,0.95,A
s1.1-7B,test/algebra/2176.json,Algebra,general,B,1.0,0.75,B,0.85,B,0.9,B,0.95,A
s1.1-7B,test/algebra/509.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/algebra/1457.json,Algebra,general,A,1.0,0.95,A,0.98,A,0.95,A,0.95,A
s1.1-7B,test/algebra/2592.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/858.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
s1.1-7B,test/algebra/1529.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,A
s1.1-7B,test/algebra/1338.json,Algebra,general,B,0.0,0.95,A,0.95,A,0.92,A,0.95,B
s1.1-7B,test/algebra/1547.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/529.json,Algebra,general,A,1.0,0.95,A,1.0,A,0.95,A,0.95,A
s1.1-7B,test/algebra/1078.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
s1.1-7B,test/algebra/251.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
s1.1-7B,test/algebra/1199.json,Algebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
s1.1-7B,test/algebra/2264.json,Algebra,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/algebra/1303.json,Algebra,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/algebra/101.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/170.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
s1.1-7B,test/algebra/849.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
s1.1-7B,test/algebra/1031.json,Algebra,general,A,0.0,0.95,B,0.85,A,0.95,B,1.0,A
s1.1-7B,test/algebra/853.json,Algebra,general,A,1.0,0.95,A,0.98,A,0.98,A,0.95,A
s1.1-7B,test/algebra/2277.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/algebra/518.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/114.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,A
s1.1-7B,test/algebra/1960.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/2680.json,Algebra,general,A,1.0,0.95,A,0.85,B,0.98,A,0.95,A
s1.1-7B,test/algebra/2391.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/algebra/776.json,Algebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
s1.1-7B,test/algebra/1796.json,Algebra,general,A,1.0,0.85,A,0.9,A,0.95,A,1.0,A
s1.1-7B,test/algebra/1339.json,Algebra,general,B,1.0,0.85,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/algebra/2743.json,Algebra,general,A,0.0,0.95,A,0.85,B,0.95,B,0.95,B
s1.1-7B,test/algebra/2043.json,Algebra,general,A,1.0,0.95,A,0.65,A,0.95,A,0.95,A
s1.1-7B,test/algebra/1553.json,Algebra,general,B,1.0,0.95,B,0.95,B,0.95,A,0.95,A
s1.1-7B,test/algebra/2080.json,Algebra,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
s1.1-7B,test/algebra/1343.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.98,A,0.9,B
s1.1-7B,test/algebra/668.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.98,B,1.0,A
s1.1-7B,test/algebra/2430.json,Algebra,general,A,1.0,0.95,A,0.95,B,0.95,A,0.95,A
s1.1-7B,test/algebra/2789.json,Algebra,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,A
s1.1-7B,test/algebra/1814.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,A
s1.1-7B,test/algebra/2476.json,Algebra,general,B,0.0,0.95,A,0.9,A,0.92,B,0.95,A
s1.1-7B,test/algebra/2780.json,Algebra,general,B,1.0,0.95,B,0.85,B,0.95,A,0.95,B
s1.1-7B,test/algebra/824.json,Algebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,A
s1.1-7B,test/algebra/1425.json,Algebra,general,A,0.0,0.95,B,0.65,B,0.95,A,0.95,A
s1.1-7B,test/algebra/224.json,Algebra,general,A,1.0,0.95,B,0.9,A,0.98,A,0.95,B
s1.1-7B,test/algebra/435.json,Algebra,general,A,1.0,0.95,B,0.75,A,0.95,A,0.95,B
s1.1-7B,test/algebra/2470.json,Algebra,general,A,1.0,0.95,A,0.95,A,0.98,A,1.0,B
s1.1-7B,test/algebra/2779.json,Algebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
s1.1-7B,test/number_theory/572.json,Number Theory,general,A,1.0,0.85,A,0.95,B,0.95,A,0.9,B
s1.1-7B,test/number_theory/515.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/number_theory/1032.json,Number Theory,general,A,1.0,0.95,B,0.65,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/737.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/number_theory/864.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.98,A,0.95,B
s1.1-7B,test/number_theory/627.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,1.0,B
s1.1-7B,test/number_theory/45.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
s1.1-7B,test/number_theory/1055.json,Number Theory,general,A,1.0,0.95,A,1.0,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/46.json,Number Theory,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/516.json,Number Theory,general,Tie,0.5,0.5,A,0.75,A,0.85,A,0.7,A
s1.1-7B,test/number_theory/357.json,Number Theory,general,A,0.0,0.85,B,1.0,B,0.9,B,0.95,A
s1.1-7B,test/number_theory/914.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
s1.1-7B,test/number_theory/847.json,Number Theory,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/753.json,Number Theory,general,A,1.0,0.95,A,0.8,A,0.9,A,0.95,A
s1.1-7B,test/number_theory/1257.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,B
s1.1-7B,test/number_theory/156.json,Number Theory,general,A,0.0,0.9,B,0.9,B,0.95,B,0.95,B
s1.1-7B,test/number_theory/612.json,Number Theory,general,A,1.0,0.95,A,0.9,A,0.95,B,0.9,B
s1.1-7B,test/number_theory/931.json,Number Theory,general,A,1.0,0.95,A,0.85,B,0.95,A,0.95,A
s1.1-7B,test/number_theory/521.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
s1.1-7B,test/number_theory/598.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,B,0.85,A
s1.1-7B,test/number_theory/978.json,Number Theory,general,A,1.0,0.88,A,0.95,A,0.95,A,1.0,B
s1.1-7B,test/number_theory/838.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,A
s1.1-7B,test/number_theory/149.json,Number Theory,general,A,1.0,0.9,A,0.85,A,0.95,A,0.95,B
s1.1-7B,test/number_theory/1201.json,Number Theory,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/234.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.9,A,0.95,A
s1.1-7B,test/number_theory/417.json,Number Theory,general,B,1.0,0.85,B,0.75,B,0.95,B,0.95,A
s1.1-7B,test/number_theory/89.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
s1.1-7B,test/number_theory/183.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.9,B,0.8,A
s1.1-7B,test/number_theory/1065.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/number_theory/466.json,Number Theory,general,B,1.0,0.95,B,0.85,B,0.95,A,0.95,A
s1.1-7B,test/number_theory/634.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
s1.1-7B,test/number_theory/533.json,Number Theory,general,A,1.0,0.95,A,0.98,A,0.95,A,0.95,B
s1.1-7B,test/number_theory/691.json,Number Theory,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/1287.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,A
s1.1-7B,test/number_theory/631.json,Number Theory,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/number_theory/488.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/number_theory/1172.json,Number Theory,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/number_theory/203.json,Number Theory,general,A,0.0,0.95,B,0.6,B,0.95,A,0.95,B
s1.1-7B,test/number_theory/911.json,Number Theory,general,B,1.0,0.95,B,0.85,A,0.95,B,0.95,A
s1.1-7B,test/number_theory/483.json,Number Theory,general,B,0.0,0.95,A,1.0,A,0.95,A,1.0,B
s1.1-7B,test/number_theory/368.json,Number Theory,general,A,1.0,0.95,B,0.65,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/686.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/820.json,Number Theory,general,A,0.0,0.85,B,0.85,B,0.9,B,0.7,A
s1.1-7B,test/number_theory/109.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/427.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/1185.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/928.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
s1.1-7B,test/number_theory/132.json,Number Theory,general,A,1.0,0.9,B,0.85,A,0.98,A,0.95,B
s1.1-7B,test/number_theory/769.json,Number Theory,general,B,1.0,0.95,B,1.0,B,0.98,B,1.0,A
s1.1-7B,test/number_theory/1002.json,Number Theory,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/410.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/number_theory/255.json,Number Theory,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
s1.1-7B,test/number_theory/1000.json,Number Theory,general,A,0.0,0.85,B,0.9,B,0.95,B,0.95,B
s1.1-7B,test/number_theory/13.json,Number Theory,general,A,1.0,0.9,A,0.95,B,0.95,A,0.95,B
s1.1-7B,test/number_theory/459.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/number_theory/342.json,Number Theory,general,B,1.0,0.95,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/number_theory/679.json,Number Theory,general,A,1.0,0.95,A,1.0,A,0.98,A,0.95,A
s1.1-7B,test/number_theory/72.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,B
s1.1-7B,test/number_theory/22.json,Number Theory,general,B,0.0,0.85,A,0.85,B,0.85,A,0.9,B
s1.1-7B,test/number_theory/1128.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/1090.json,Number Theory,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/number_theory/239.json,Number Theory,general,A,1.0,0.95,A,0.85,B,0.95,A,0.95,B
s1.1-7B,test/prealgebra/1622.json,Prealgebra,general,B,1.0,0.9,B,0.6,B,0.95,B,0.95,B
s1.1-7B,test/prealgebra/1139.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.65,B,0.7,A
s1.1-7B,test/prealgebra/1840.json,Prealgebra,general,B,0.0,0.95,A,0.9,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1302.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.92,A,0.95,A
s1.1-7B,test/prealgebra/930.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,1.0,A
s1.1-7B,test/prealgebra/1558.json,Prealgebra,general,A,0.0,0.95,B,0.75,A,0.95,B,0.95,B
s1.1-7B,test/prealgebra/1388.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/951.json,Prealgebra,general,A,1.0,0.85,A,0.85,A,0.98,A,0.95,A
s1.1-7B,test/prealgebra/572.json,Prealgebra,general,A,0.0,0.95,B,0.9,B,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1247.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.98,B,1.0,B
s1.1-7B,test/prealgebra/1747.json,Prealgebra,general,B,0.0,1.0,A,0.95,A,0.95,A,0.9,A
s1.1-7B,test/prealgebra/1233.json,Prealgebra,general,A,1.0,0.95,B,0.95,A,0.95,A,1.0,A
s1.1-7B,test/prealgebra/192.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
s1.1-7B,test/prealgebra/307.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,A,1.0,A
s1.1-7B,test/prealgebra/1761.json,Prealgebra,general,A,0.0,0.9,B,0.85,B,0.9,B,0.95,B
s1.1-7B,test/prealgebra/1646.json,Prealgebra,general,A,1.0,0.85,A,0.6,B,0.95,A,0.6,A
s1.1-7B,test/prealgebra/105.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/prealgebra/1924.json,Prealgebra,general,A,0.0,0.85,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/prealgebra/1804.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
s1.1-7B,test/prealgebra/1733.json,Prealgebra,general,A,0.0,0.95,B,0.65,B,0.85,A,0.8,A
s1.1-7B,test/prealgebra/505.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1686.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.98,B,0.95,A
s1.1-7B,test/prealgebra/1807.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.9,B
s1.1-7B,test/prealgebra/1297.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/prealgebra/1655.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/prealgebra/1356.json,Prealgebra,general,A,1.0,0.85,A,1.0,A,0.95,A,1.0,B
s1.1-7B,test/prealgebra/1003.json,Prealgebra,general,A,0.0,0.95,A,0.75,B,0.95,B,0.9,B
s1.1-7B,test/prealgebra/1272.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.98,A,0.95,A
s1.1-7B,test/prealgebra/1113.json,Prealgebra,general,A,0.0,0.95,B,0.85,A,0.98,B,0.95,B
s1.1-7B,test/prealgebra/1908.json,Prealgebra,general,A,1.0,0.95,A,1.0,A,0.95,A,1.0,A
s1.1-7B,test/prealgebra/1922.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/prealgebra/1907.json,Prealgebra,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/prealgebra/2086.json,Prealgebra,general,B,1.0,0.85,B,0.95,B,0.98,B,1.0,A
s1.1-7B,test/prealgebra/378.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/prealgebra/1555.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1436.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/prealgebra/1961.json,Prealgebra,general,B,0.0,0.95,B,0.92,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/2057.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,A
s1.1-7B,test/prealgebra/153.json,Prealgebra,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/prealgebra/874.json,Prealgebra,general,B,1.0,0.95,B,0.85,B,0.6,B,0.65,A
s1.1-7B,test/prealgebra/1251.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1458.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/prealgebra/1995.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.92,A,0.95,A
s1.1-7B,test/prealgebra/1317.json,Prealgebra,general,A,1.0,0.85,A,0.98,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1742.json,Prealgebra,general,B,1.0,0.85,B,0.85,A,0.95,B,0.95,A
s1.1-7B,test/prealgebra/993.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1834.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/prealgebra/1512.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/prealgebra/260.json,Prealgebra,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/prealgebra/1787.json,Prealgebra,general,A,1.0,0.95,A,0.9,A,0.95,A,1.0,B
s1.1-7B,test/prealgebra/1044.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,B
s1.1-7B,test/prealgebra/465.json,Prealgebra,general,A,1.0,0.95,A,0.98,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1423.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,A
s1.1-7B,test/prealgebra/954.json,Prealgebra,general,A,0.0,0.95,B,0.9,A,0.95,B,0.95,A
s1.1-7B,test/prealgebra/1973.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/prealgebra/1730.json,Prealgebra,general,B,0.0,0.85,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1238.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1353.json,Prealgebra,general,A,1.0,0.85,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1187.json,Prealgebra,general,A,1.0,0.95,B,0.65,A,0.95,,,A
s1.1-7B,test/prealgebra/1743.json,Prealgebra,general,A,1.0,0.95,A,0.85,B,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1865.json,Prealgebra,general,A,1.0,1.0,A,0.9,B,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1298.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
s1.1-7B,test/prealgebra/2066.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/prealgebra/631.json,Prealgebra,general,B,1.0,0.7,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/prealgebra/977.json,Prealgebra,general,A,1.0,0.95,B,0.95,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1991.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.98,A,0.95,B
s1.1-7B,test/prealgebra/1784.json,Prealgebra,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/prealgebra/1572.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
s1.1-7B,test/prealgebra/65.json,Prealgebra,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1227.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,B,0.95,B
s1.1-7B,test/prealgebra/2019.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/prealgebra/1640.json,Prealgebra,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,A
s1.1-7B,test/prealgebra/2037.json,Prealgebra,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/prealgebra/996.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/805.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/914.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.75,A,0.9,A
s1.1-7B,test/prealgebra/1114.json,Prealgebra,general,B,0.0,0.85,A,1.0,A,0.95,A,1.0,A
s1.1-7B,test/prealgebra/846.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
s1.1-7B,test/prealgebra/1930.json,Prealgebra,general,B,1.0,0.85,B,0.98,B,0.95,B,1.0,B
s1.1-7B,test/prealgebra/1252.json,Prealgebra,general,B,0.0,0.95,A,0.85,A,0.95,A,0.9,B
s1.1-7B,test/prealgebra/1203.json,Prealgebra,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/prealgebra/1128.json,Prealgebra,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
s1.1-7B,test/geometry/248.json,Geometry,general,A,1.0,0.98,B,0.6,A,0.98,A,0.9,A
s1.1-7B,test/geometry/434.json,Geometry,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/geometry/967.json,Geometry,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,A
s1.1-7B,test/geometry/627.json,Geometry,general,B,0.0,0.95,A,0.9,A,0.95,A,0.95,B
s1.1-7B,test/geometry/178.json,Geometry,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
s1.1-7B,test/geometry/456.json,Geometry,general,B,1.0,0.9,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/geometry/353.json,Geometry,general,A,1.0,0.95,B,0.95,A,0.92,A,0.95,A
s1.1-7B,test/geometry/183.json,Geometry,general,A,0.0,0.6,B,0.7,B,0.9,B,0.9,A
s1.1-7B,test/geometry/283.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.95,B,0.95,A
s1.1-7B,test/geometry/1140.json,Geometry,general,B,1.0,0.85,B,0.85,B,0.95,B,0.95,A
s1.1-7B,test/geometry/172.json,Geometry,general,B,1.0,0.95,B,0.95,B,0.95,,,B
s1.1-7B,test/geometry/880.json,Geometry,general,B,1.0,0.95,B,0.95,B,0.9,,,A
s1.1-7B,test/geometry/802.json,Geometry,general,A,1.0,0.9,A,0.98,A,0.95,A,1.0,B
s1.1-7B,test/geometry/65.json,Geometry,general,B,1.0,0.85,B,0.65,B,0.75,B,0.95,B
s1.1-7B,test/geometry/702.json,Geometry,general,B,1.0,0.85,B,0.98,B,0.95,B,1.0,B
s1.1-7B,test/geometry/221.json,Geometry,general,B,0.0,0.95,A,0.9,A,0.92,A,0.95,B
s1.1-7B,test/geometry/547.json,Geometry,general,A,0.0,0.9,B,0.85,B,0.95,B,0.95,B
s1.1-7B,test/geometry/229.json,Geometry,general,A,1.0,0.95,A,1.0,A,0.95,A,0.95,A
s1.1-7B,test/geometry/254.json,Geometry,general,A,1.0,0.95,B,0.7,A,0.95,A,0.95,A
s1.1-7B,test/geometry/473.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.98,A,0.95,A
s1.1-7B,test/geometry/347.json,Geometry,general,A,0.0,0.95,B,0.85,B,0.95,B,1.0,B
s1.1-7B,test/geometry/483.json,Geometry,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,B
s1.1-7B,test/geometry/826.json,Geometry,general,B,1.0,0.9,B,0.65,B,0.95,B,0.95,B
s1.1-7B,test/geometry/226.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/geometry/686.json,Geometry,general,B,0.0,0.95,A,0.95,A,0.95,A,0.95,B
s1.1-7B,test/geometry/1097.json,Geometry,general,A,0.0,0.85,B,0.95,B,0.95,B,0.95,B
s1.1-7B,test/geometry/965.json,Geometry,general,A,0.0,0.75,B,0.85,B,0.9,B,0.9,A
s1.1-7B,test/geometry/711.json,Geometry,general,A,0.0,0.95,B,0.85,B,0.85,B,0.95,A
s1.1-7B,test/geometry/1108.json,Geometry,general,A,0.0,0.95,B,0.9,B,0.95,A,1.0,B
s1.1-7B,test/geometry/947.json,Geometry,general,B,1.0,0.35,B,0.8,B,0.75,B,0.65,B
s1.1-7B,test/geometry/465.json,Geometry,general,A,0.0,1.0,A,0.7,B,0.75,B,0.7,B
s1.1-7B,test/geometry/73.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/geometry/106.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,B
s1.1-7B,test/geometry/846.json,Geometry,general,B,1.0,0.85,B,0.75,B,0.7,B,0.7,B
s1.1-7B,test/geometry/538.json,Geometry,general,A,0.0,0.95,B,0.95,A,0.95,B,0.95,A
s1.1-7B,test/geometry/795.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,B
s1.1-7B,test/geometry/817.json,Geometry,general,B,1.0,0.95,B,0.95,B,0.95,B,0.65,A
s1.1-7B,test/geometry/843.json,Geometry,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,A
s1.1-7B,test/geometry/477.json,Geometry,general,A,0.0,0.95,B,0.95,B,0.85,B,0.95,A
s1.1-7B,test/geometry/561.json,Geometry,general,B,1.0,0.85,A,0.55,B,0.9,B,0.75,A
s1.1-7B,test/geometry/615.json,Geometry,general,A,1.0,0.95,A,0.95,A,0.85,A,0.95,B
s1.1-7B,test/counting_and_probability/525.json,Counting & Probability,general,B,1.0,0.85,B,0.95,B,0.85,,,B
s1.1-7B,test/counting_and_probability/666.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/counting_and_probability/134.json,Counting & Probability,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,A
s1.1-7B,test/counting_and_probability/119.json,Counting & Probability,general,A,1.0,0.95,B,0.92,A,0.95,A,0.95,A
s1.1-7B,test/counting_and_probability/1114.json,Counting & Probability,general,A,0.0,0.95,B,0.95,B,0.95,A,0.95,A
s1.1-7B,test/counting_and_probability/377.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.98,A,0.95,A
s1.1-7B,test/counting_and_probability/23957.json,Counting & Probability,general,A,1.0,0.95,A,0.95,B,0.92,A,0.95,A
s1.1-7B,test/counting_and_probability/1060.json,Counting & Probability,general,B,1.0,0.95,B,0.95,B,0.95,B,0.95,A
s1.1-7B,test/counting_and_probability/430.json,Counting & Probability,general,A,0.0,0.95,B,0.7,B,0.85,,,B
s1.1-7B,test/counting_and_probability/159.json,Counting & Probability,general,B,0.0,0.95,A,1.0,A,0.95,A,1.0,A
s1.1-7B,test/counting_and_probability/230.json,Counting & Probability,general,B,1.0,0.95,B,0.9,B,0.95,A,0.95,B
s1.1-7B,test/counting_and_probability/803.json,Counting & Probability,general,B,0.0,0.95,B,0.9,A,0.95,A,0.95,B
s1.1-7B,test/counting_and_probability/181.json,Counting & Probability,general,B,1.0,0.85,B,0.7,B,0.7,B,0.7,A
s1.1-7B,test/counting_and_probability/51.json,Counting & Probability,general,A,1.0,0.95,A,0.85,B,0.95,A,0.95,B
s1.1-7B,test/counting_and_probability/508.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
s1.1-7B,test/counting_and_probability/389.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,A
s1.1-7B,test/counting_and_probability/765.json,Counting & Probability,general,A,0.0,1.0,B,0.85,B,0.95,B,0.95,B
s1.1-7B,test/counting_and_probability/282.json,Counting & Probability,general,B,0.0,0.75,A,0.65,B,0.65,A,0.9,B
s1.1-7B,test/counting_and_probability/71.json,Counting & Probability,general,A,1.0,0.95,B,0.85,A,0.95,A,0.95,A
s1.1-7B,test/counting_and_probability/894.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/counting_and_probability/1009.json,Counting & Probability,general,B,0.0,0.95,B,0.85,A,0.95,A,0.95,B
s1.1-7B,test/counting_and_probability/913.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/counting_and_probability/25149.json,Counting & Probability,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
s1.1-7B,test/counting_and_probability/339.json,Counting & Probability,general,A,1.0,0.95,A,0.9,A,0.95,A,0.95,A
s1.1-7B,test/counting_and_probability/870.json,Counting & Probability,general,B,1.0,0.9,A,0.95,B,0.9,B,0.9,A
s1.1-7B,test/counting_and_probability/216.json,Counting & Probability,general,B,0.0,0.95,A,0.9,A,0.98,A,1.0,B
s1.1-7B,test/counting_and_probability/737.json,Counting & Probability,general,A,0.0,0.95,B,0.85,B,0.95,A,0.95,B
s1.1-7B,test/counting_and_probability/116.json,Counting & Probability,general,A,1.0,0.95,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/counting_and_probability/238.json,Counting & Probability,general,A,0.0,0.95,B,0.75,B,0.9,A,0.7,B
s1.1-7B,test/counting_and_probability/1014.json,Counting & Probability,general,A,0.0,0.95,A,0.95,B,0.95,B,0.95,A
s1.1-7B,test/counting_and_probability/14.json,Counting & Probability,general,A,0.0,0.98,B,0.85,B,0.95,B,0.95,B
s1.1-7B,test/counting_and_probability/188.json,Counting & Probability,general,B,1.0,0.85,B,0.85,B,0.75,A,0.9,B
s1.1-7B,test/counting_and_probability/761.json,Counting & Probability,general,A,0.0,0.95,B,0.85,A,0.95,B,0.95,B
s1.1-7B,test/counting_and_probability/10.json,Counting & Probability,general,A,1.0,0.95,A,0.85,A,0.95,A,0.95,B
s1.1-7B,test/counting_and_probability/731.json,Counting & Probability,general,A,0.0,0.8,B,0.75,B,0.85,A,0.7,A
s1.1-7B,test/counting_and_probability/190.json,Counting & Probability,general,A,1.0,0.95,B,0.9,A,0.95,A,0.95,B
s1.1-7B,test/counting_and_probability/1003.json,Counting & Probability,general,A,1.0,1.0,A,0.95,A,0.95,A,0.95,A
s1.1-7B,test/counting_and_probability/199.json,Counting & Probability,general,A,1.0,0.95,A,1.0,A,0.95,A,0.95,B
