,solver,solver_id,problem,competition,split,judge,accuracy
4854,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_1,allrussian,proofs,OPC R1 8B,0.0
4855,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_10,allrussian,proofs,OPC R1 8B,0.0
4856,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_11,allrussian,proofs,OPC R1 8B,1.0
4857,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_12,allrussian,proofs,OPC R1 8B,0.0
4858,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_13,allrussian,proofs,OPC R1 8B,1.0
4859,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_14,allrussian,proofs,OPC R1 8B,0.0
4860,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_15,allrussian,proofs,OPC R1 8B,0.0
4861,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_16,allrussian,proofs,OPC R1 8B,0.0
4862,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_2,allrussian,proofs,OPC R1 8B,0.0
4863,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_3,allrussian,proofs,OPC R1 8B,0.0
4864,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_4,allrussian,proofs,OPC R1 8B,0.0
4865,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_5,allrussian,proofs,OPC R1 8B,1.0
4866,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_6,allrussian,proofs,OPC R1 8B,0.0
4867,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_7,allrussian,proofs,OPC R1 8B,1.0
4868,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_8,allrussian,proofs,OPC R1 8B,0.0
4869,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,allrussian_2025_9,allrussian,proofs,OPC R1 8B,1.0
4870,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmo_2025_1,bmo,proofs,OPC R1 8B,0.0
4871,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmo_2025_2,bmo,proofs,OPC R1 8B,0.0
4872,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmo_2025_3,bmo,proofs,OPC R1 8B,0.0
4873,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmo_2025_4,bmo,proofs,OPC R1 8B,1.0
4874,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_A_2025_1,bmosl,proofs,OPC R1 8B,0.0
4875,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_A_2025_2,bmosl,proofs,OPC R1 8B,0.0
4876,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_A_2025_3,bmosl,proofs,OPC R1 8B,0.0
4877,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_A_2025_4,bmosl,proofs,OPC R1 8B,0.0
4878,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_A_2025_5,bmosl,proofs,OPC R1 8B,0.0
4879,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_A_2025_6,bmosl,proofs,OPC R1 8B,0.0
4880,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_C_2025_1,bmosl,proofs,OPC R1 8B,1.0
4881,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_C_2025_2,bmosl,proofs,OPC R1 8B,0.0
4882,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_C_2025_3,bmosl,proofs,OPC R1 8B,0.0
4883,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_C_2025_4,bmosl,proofs,OPC R1 8B,0.0
4884,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_C_2025_5,bmosl,proofs,OPC R1 8B,0.0
4885,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_C_2025_6,bmosl,proofs,OPC R1 8B,0.0
4886,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_G_2025_1,bmosl,proofs,OPC R1 8B,0.0
4887,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_G_2025_2,bmosl,proofs,OPC R1 8B,0.0
4888,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_G_2025_3,bmosl,proofs,OPC R1 8B,0.0
4889,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_G_2025_4,bmosl,proofs,OPC R1 8B,0.0
4890,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_G_2025_5,bmosl,proofs,OPC R1 8B,0.0
4891,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_G_2025_6,bmosl,proofs,OPC R1 8B,0.0
4892,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_G_2025_7,bmosl,proofs,OPC R1 8B,0.0
4893,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_NT_2025_1,bmosl,proofs,OPC R1 8B,0.0
4894,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_NT_2025_2,bmosl,proofs,OPC R1 8B,0.0
4895,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_NT_2025_3,bmosl,proofs,OPC R1 8B,0.0
4896,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_NT_2025_4,bmosl,proofs,OPC R1 8B,0.0
4897,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_NT_2025_5,bmosl,proofs,OPC R1 8B,0.0
4898,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_NT_2025_6,bmosl,proofs,OPC R1 8B,0.0
4899,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bmosl_NT_2025_7,bmosl,proofs,OPC R1 8B,0.0
4900,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bulgaria_2025_1,bulgaria,proofs,OPC R1 8B,1.0
4901,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bulgaria_2025_2,bulgaria,proofs,OPC R1 8B,0.0
4902,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bulgaria_2025_3,bulgaria,proofs,OPC R1 8B,0.0
4903,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bulgaria_2025_4,bulgaria,proofs,OPC R1 8B,0.0
4904,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bulgaria_2025_5,bulgaria,proofs,OPC R1 8B,0.0
4905,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,bulgaria_2025_6,bulgaria,proofs,OPC R1 8B,0.0
4906,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,canada_2025_1,canada,proofs,OPC R1 8B,0.0
4907,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,canada_2025_2,canada,proofs,OPC R1 8B,1.0
4908,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,canada_2025_3,canada,proofs,OPC R1 8B,1.0
4909,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,canada_2025_4,canada,proofs,OPC R1 8B,0.0
4910,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,canada_2025_5,canada,proofs,OPC R1 8B,0.0
4911,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,china_2025_1,china,proofs,OPC R1 8B,0.0
4912,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,china_2025_2,china,proofs,OPC R1 8B,0.0
4913,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,china_2025_3,china,proofs,OPC R1 8B,0.0
4914,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,china_2025_5,china,proofs,OPC R1 8B,0.0
4915,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,china_2025_6,china,proofs,OPC R1 8B,0.0
4916,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_1,chinatst,proofs,OPC R1 8B,0.0
4917,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_10,chinatst,proofs,OPC R1 8B,0.0
4918,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_11,chinatst,proofs,OPC R1 8B,0.0
4919,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_12,chinatst,proofs,OPC R1 8B,0.0
4920,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_13,chinatst,proofs,OPC R1 8B,0.0
4921,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_14,chinatst,proofs,OPC R1 8B,0.0
4922,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_15,chinatst,proofs,OPC R1 8B,0.0
4923,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_16,chinatst,proofs,OPC R1 8B,0.0
4924,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_17,chinatst,proofs,OPC R1 8B,1.0
4925,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_18,chinatst,proofs,OPC R1 8B,0.0
4926,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_19,chinatst,proofs,OPC R1 8B,0.0
4927,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_2,chinatst,proofs,OPC R1 8B,0.0
4928,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_20,chinatst,proofs,OPC R1 8B,0.0
4929,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_21,chinatst,proofs,OPC R1 8B,0.0
4930,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_22,chinatst,proofs,OPC R1 8B,0.0
4931,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_3,chinatst,proofs,OPC R1 8B,1.0
4932,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_4,chinatst,proofs,OPC R1 8B,0.0
4933,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_5,chinatst,proofs,OPC R1 8B,0.0
4934,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_6,chinatst,proofs,OPC R1 8B,1.0
4935,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_7,chinatst,proofs,OPC R1 8B,0.0
4936,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_8,chinatst,proofs,OPC R1 8B,0.0
4937,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,chinatst_2025_9,chinatst,proofs,OPC R1 8B,0.0
4938,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,egmo_2025_1,egmo,proofs,OPC R1 8B,1.0
4939,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,egmo_2025_2,egmo,proofs,OPC R1 8B,0.0
4940,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,egmo_2025_3,egmo,proofs,OPC R1 8B,0.0
4941,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,egmo_2025_4,egmo,proofs,OPC R1 8B,0.0
4942,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,egmo_2025_5,egmo,proofs,OPC R1 8B,0.0
4943,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,egmo_2025_6,egmo,proofs,OPC R1 8B,1.0
4944,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_A_2025_1,elmosl,proofs,OPC R1 8B,0.0
4945,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_A_2025_2,elmosl,proofs,OPC R1 8B,0.0
4946,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_A_2025_3,elmosl,proofs,OPC R1 8B,0.0
4947,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_A_2025_5,elmosl,proofs,OPC R1 8B,0.0
4948,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_A_2025_6,elmosl,proofs,OPC R1 8B,1.0
4949,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_A_2025_7,elmosl,proofs,OPC R1 8B,0.0
4950,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_A_2025_8,elmosl,proofs,OPC R1 8B,0.0
4951,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_C_2025_1,elmosl,proofs,OPC R1 8B,0.0
4952,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_C_2025_2,elmosl,proofs,OPC R1 8B,1.0
4953,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_C_2025_3,elmosl,proofs,OPC R1 8B,0.0
4954,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_C_2025_4,elmosl,proofs,OPC R1 8B,0.0
4955,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_C_2025_5,elmosl,proofs,OPC R1 8B,1.0
4956,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_C_2025_6,elmosl,proofs,OPC R1 8B,0.0
4957,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_C_2025_7,elmosl,proofs,OPC R1 8B,0.0
4958,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_C_2025_8,elmosl,proofs,OPC R1 8B,0.0
4959,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_C_2025_9,elmosl,proofs,OPC R1 8B,0.0
4960,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_G_2025_1,elmosl,proofs,OPC R1 8B,0.0
4961,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_G_2025_2,elmosl,proofs,OPC R1 8B,0.0
4962,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_G_2025_3,elmosl,proofs,OPC R1 8B,0.0
4963,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_G_2025_4,elmosl,proofs,OPC R1 8B,0.0
4964,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_G_2025_5,elmosl,proofs,OPC R1 8B,0.0
4965,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_G_2025_6,elmosl,proofs,OPC R1 8B,0.0
4966,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_G_2025_7,elmosl,proofs,OPC R1 8B,0.0
4967,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_G_2025_8,elmosl,proofs,OPC R1 8B,0.0
4968,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_G_2025_9,elmosl,proofs,OPC R1 8B,0.0
4969,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_NT_2025_1,elmosl,proofs,OPC R1 8B,0.0
4970,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_NT_2025_2,elmosl,proofs,OPC R1 8B,0.0
4971,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_NT_2025_3,elmosl,proofs,OPC R1 8B,0.0
4972,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_NT_2025_4,elmosl,proofs,OPC R1 8B,0.0
4973,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_NT_2025_5,elmosl,proofs,OPC R1 8B,0.0
4974,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_NT_2025_6,elmosl,proofs,OPC R1 8B,1.0
4975,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,elmosl_NT_2025_7,elmosl,proofs,OPC R1 8B,0.0
4976,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,german_2025_1,german,proofs,OPC R1 8B,1.0
4977,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,german_2025_2,german,proofs,OPC R1 8B,0.0
4978,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,german_2025_3,german,proofs,OPC R1 8B,0.0
4979,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,german_2025_4,german,proofs,OPC R1 8B,0.0
4980,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,greece_2025_1,greece,proofs,OPC R1 8B,0.0
4981,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,greece_2025_2,greece,proofs,OPC R1 8B,0.0
4982,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,greece_2025_3,greece,proofs,OPC R1 8B,0.0
4983,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,greece_2025_4,greece,proofs,OPC R1 8B,0.0
4984,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_1,imosl,proofs,OPC R1 8B,0.0
4985,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_10,imosl,proofs,OPC R1 8B,0.0
4986,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_11,imosl,proofs,OPC R1 8B,0.0
4987,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_12,imosl,proofs,OPC R1 8B,1.0
4988,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_13,imosl,proofs,OPC R1 8B,0.0
4989,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_14,imosl,proofs,OPC R1 8B,0.0
4990,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_15,imosl,proofs,OPC R1 8B,0.0
4991,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_16,imosl,proofs,OPC R1 8B,0.0
4992,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_17,imosl,proofs,OPC R1 8B,0.0
4993,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_18,imosl,proofs,OPC R1 8B,0.0
4994,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_19,imosl,proofs,OPC R1 8B,0.0
4995,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_2,imosl,proofs,OPC R1 8B,0.0
4996,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_20,imosl,proofs,OPC R1 8B,1.0
4997,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_21,imosl,proofs,OPC R1 8B,0.0
4998,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_22,imosl,proofs,OPC R1 8B,0.0
4999,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_23,imosl,proofs,OPC R1 8B,0.0
5000,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_24,imosl,proofs,OPC R1 8B,0.0
5001,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_25,imosl,proofs,OPC R1 8B,0.0
5002,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_26,imosl,proofs,OPC R1 8B,0.0
5003,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_27,imosl,proofs,OPC R1 8B,0.0
5004,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_28,imosl,proofs,OPC R1 8B,0.0
5005,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_29,imosl,proofs,OPC R1 8B,0.0
5006,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_3,imosl,proofs,OPC R1 8B,0.0
5007,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_30,imosl,proofs,OPC R1 8B,0.0
5008,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_31,imosl,proofs,OPC R1 8B,0.0
5009,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_32,imosl,proofs,OPC R1 8B,1.0
5010,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_33,imosl,proofs,OPC R1 8B,0.0
5011,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_34,imosl,proofs,OPC R1 8B,0.0
5012,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_35,imosl,proofs,OPC R1 8B,0.0
5013,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_36,imosl,proofs,OPC R1 8B,0.0
5014,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_37,imosl,proofs,OPC R1 8B,0.0
5015,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_4,imosl,proofs,OPC R1 8B,0.0
5016,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_5,imosl,proofs,OPC R1 8B,0.0
5017,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_6,imosl,proofs,OPC R1 8B,0.0
5018,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_7,imosl,proofs,OPC R1 8B,1.0
5019,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_8,imosl,proofs,OPC R1 8B,0.0
5020,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,imosl_2025_9,imosl,proofs,OPC R1 8B,0.0
5021,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_2025_1,india,proofs,OPC R1 8B,1.0
5022,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_2025_2,india,proofs,OPC R1 8B,0.0
5023,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_2025_3,india,proofs,OPC R1 8B,0.0
5024,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_2025_4,india,proofs,OPC R1 8B,0.0
5025,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_2025_5,india,proofs,OPC R1 8B,0.0
5026,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_2025_6,india,proofs,OPC R1 8B,0.0
5027,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_1,india,proofs,OPC R1 8B,0.0
5028,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_10,india,proofs,OPC R1 8B,0.0
5029,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_11,india,proofs,OPC R1 8B,0.0
5030,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_12,india,proofs,OPC R1 8B,0.0
5031,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_13,india,proofs,OPC R1 8B,1.0
5032,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_14,india,proofs,OPC R1 8B,1.0
5033,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_15,india,proofs,OPC R1 8B,0.0
5034,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_16,india,proofs,OPC R1 8B,0.0
5035,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_17,india,proofs,OPC R1 8B,0.0
5036,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_18,india,proofs,OPC R1 8B,0.0
5037,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_19,india,proofs,OPC R1 8B,0.0
5038,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_2,india,proofs,OPC R1 8B,0.0
5039,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_20,india,proofs,OPC R1 8B,0.0
5040,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_21,india,proofs,OPC R1 8B,0.0
5041,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_3,india,proofs,OPC R1 8B,0.0
5042,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_4,india,proofs,OPC R1 8B,0.0
5043,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_5,india,proofs,OPC R1 8B,0.0
5044,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_6,india,proofs,OPC R1 8B,0.0
5045,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_7,india,proofs,OPC R1 8B,0.0
5046,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_8,india,proofs,OPC R1 8B,0.0
5047,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,india_prep_2025_9,india,proofs,OPC R1 8B,1.0
5048,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,iran_tst_2025_1,iran,proofs,OPC R1 8B,0.0
5049,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,iran_tst_2025_2,iran,proofs,OPC R1 8B,0.0
5050,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,iran_tst_2025_3,iran,proofs,OPC R1 8B,0.0
5051,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,iran_tst_2025_4,iran,proofs,OPC R1 8B,1.0
5052,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,iran_tst_2025_5,iran,proofs,OPC R1 8B,0.0
5053,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,iran_tst_2025_6,iran,proofs,OPC R1 8B,0.0
5054,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,iran_tst_2025_7,iran,proofs,OPC R1 8B,0.0
5055,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,iran_tst_2025_8,iran,proofs,OPC R1 8B,0.0
5056,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,iran_tst_2025_9,iran,proofs,OPC R1 8B,0.0
5057,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,israel_tst_2025_1,israel,proofs,OPC R1 8B,0.0
5058,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,israel_tst_2025_10,israel,proofs,OPC R1 8B,0.0
5059,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,israel_tst_2025_2,israel,proofs,OPC R1 8B,0.0
5060,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,israel_tst_2025_3,israel,proofs,OPC R1 8B,0.0
5061,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,israel_tst_2025_4,israel,proofs,OPC R1 8B,0.0
5062,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,israel_tst_2025_5,israel,proofs,OPC R1 8B,0.0
5063,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,israel_tst_2025_6,israel,proofs,OPC R1 8B,0.0
5064,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,israel_tst_2025_7,israel,proofs,OPC R1 8B,0.0
5065,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,israel_tst_2025_8,israel,proofs,OPC R1 8B,0.0
5066,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,israel_tst_2025_9,israel,proofs,OPC R1 8B,0.0
5067,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,izho_2025_1,izho,proofs,OPC R1 8B,1.0
5068,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,izho_2025_2,izho,proofs,OPC R1 8B,0.0
5069,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,izho_2025_3,izho,proofs,OPC R1 8B,0.0
5070,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,izho_2025_4,izho,proofs,OPC R1 8B,0.0
5071,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,izho_2025_5,izho,proofs,OPC R1 8B,0.0
5072,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,izho_2025_6,izho,proofs,OPC R1 8B,0.0
5073,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,jbmo_2025_1,jbmo,proofs,OPC R1 8B,1.0
5074,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,jbmo_2025_2,jbmo,proofs,OPC R1 8B,1.0
5075,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,jbmo_2025_3,jbmo,proofs,OPC R1 8B,0.0
5076,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,jbmo_2025_4,jbmo,proofs,OPC R1 8B,0.0
5077,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,korea_2025_1,korea,proofs,OPC R1 8B,0.0
5078,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,korea_2025_2,korea,proofs,OPC R1 8B,0.0
5079,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,korea_2025_3,korea,proofs,OPC R1 8B,0.0
5080,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,korea_2025_4,korea,proofs,OPC R1 8B,0.0
5081,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,korea_2025_5,korea,proofs,OPC R1 8B,0.0
5082,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,korea_2025_6,korea,proofs,OPC R1 8B,0.0
5083,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_1,matharena,answer,Final Answer Parser,1.0
5084,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_10,matharena,answer,Final Answer Parser,0.0
5085,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_11,matharena,answer,Final Answer Parser,1.0
5086,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_12,matharena,answer,Final Answer Parser,1.0
5087,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_13,matharena,answer,Final Answer Parser,0.0
5088,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_14,matharena,answer,Final Answer Parser,0.0
5089,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_15,matharena,answer,Final Answer Parser,0.0
5090,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_16,matharena,answer,Final Answer Parser,1.0
5091,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_17,matharena,answer,Final Answer Parser,1.0
5092,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_18,matharena,answer,Final Answer Parser,1.0
5093,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_19,matharena,answer,Final Answer Parser,1.0
5094,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_2,matharena,answer,Final Answer Parser,1.0
5095,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_20,matharena,answer,Final Answer Parser,0.0
5096,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_21,matharena,answer,Final Answer Parser,1.0
5097,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_22,matharena,answer,Final Answer Parser,1.0
5098,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_23,matharena,answer,Final Answer Parser,1.0
5099,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_24,matharena,answer,Final Answer Parser,1.0
5100,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_25,matharena,answer,Final Answer Parser,1.0
5101,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_26,matharena,answer,Final Answer Parser,1.0
5102,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_27,matharena,answer,Final Answer Parser,1.0
5103,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_28,matharena,answer,Final Answer Parser,1.0
5104,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_29,matharena,answer,Final Answer Parser,1.0
5105,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_3,matharena,answer,Final Answer Parser,1.0
5106,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_30,matharena,answer,Final Answer Parser,0.0
5107,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_4,matharena,answer,Final Answer Parser,1.0
5108,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_5,matharena,answer,Final Answer Parser,1.0
5109,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_6,matharena,answer,Final Answer Parser,1.0
5110,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_7,matharena,answer,Final Answer Parser,0.0
5111,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_8,matharena,answer,Final Answer Parser,1.0
5112,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_aime/aime_2025_9,matharena,answer,Final Answer Parser,1.0
5113,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_1,matharena,answer,Final Answer Parser,1.0
5114,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_10,matharena,answer,Final Answer Parser,0.0
5115,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_11,matharena,answer,Final Answer Parser,1.0
5116,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_12,matharena,answer,Final Answer Parser,1.0
5117,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_13,matharena,answer,Final Answer Parser,0.0
5118,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_14,matharena,answer,Final Answer Parser,1.0
5119,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_15,matharena,answer,Final Answer Parser,1.0
5120,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_16,matharena,answer,Final Answer Parser,1.0
5121,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_17,matharena,answer,Final Answer Parser,1.0
5122,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_18,matharena,answer,Final Answer Parser,1.0
5123,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_19,matharena,answer,Final Answer Parser,1.0
5124,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_2,matharena,answer,Final Answer Parser,1.0
5125,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_20,matharena,answer,Final Answer Parser,1.0
5126,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_21,matharena,answer,Final Answer Parser,1.0
5127,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_22,matharena,answer,Final Answer Parser,0.0
5128,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_23,matharena,answer,Final Answer Parser,0.0
5129,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_24,matharena,answer,Final Answer Parser,1.0
5130,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_25,matharena,answer,Final Answer Parser,1.0
5131,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_26,matharena,answer,Final Answer Parser,1.0
5132,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_27,matharena,answer,Final Answer Parser,1.0
5133,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_28,matharena,answer,Final Answer Parser,1.0
5134,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_29,matharena,answer,Final Answer Parser,0.0
5135,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_3,matharena,answer,Final Answer Parser,0.0
5136,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_30,matharena,answer,Final Answer Parser,0.0
5137,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_4,matharena,answer,Final Answer Parser,1.0
5138,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_5,matharena,answer,Final Answer Parser,1.0
5139,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_6,matharena,answer,Final Answer Parser,1.0
5140,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_7,matharena,answer,Final Answer Parser,1.0
5141,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_8,matharena,answer,Final Answer Parser,1.0
5142,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_brumo/brumo_2025_9,matharena,answer,Final Answer Parser,1.0
5143,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_1,matharena,answer,Final Answer Parser,1.0
5144,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_10,matharena,answer,Final Answer Parser,1.0
5145,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_11,matharena,answer,Final Answer Parser,1.0
5146,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_12,matharena,answer,Final Answer Parser,1.0
5147,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_13,matharena,answer,Final Answer Parser,1.0
5148,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_14,matharena,answer,Final Answer Parser,0.0
5149,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_15,matharena,answer,Final Answer Parser,0.0
5150,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_16,matharena,answer,Final Answer Parser,0.0
5151,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_17,matharena,answer,Final Answer Parser,1.0
5152,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_18,matharena,answer,Final Answer Parser,0.0
5153,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_19,matharena,answer,Final Answer Parser,1.0
5154,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_2,matharena,answer,Final Answer Parser,1.0
5155,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_20,matharena,answer,Final Answer Parser,1.0
5156,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_21,matharena,answer,Final Answer Parser,0.0
5157,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_22,matharena,answer,Final Answer Parser,1.0
5158,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_23,matharena,answer,Final Answer Parser,0.0
5159,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_24,matharena,answer,Final Answer Parser,1.0
5160,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_25,matharena,answer,Final Answer Parser,1.0
5161,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_26,matharena,answer,Final Answer Parser,1.0
5162,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_27,matharena,answer,Final Answer Parser,1.0
5163,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_28,matharena,answer,Final Answer Parser,0.0
5164,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_29,matharena,answer,Final Answer Parser,1.0
5165,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_3,matharena,answer,Final Answer Parser,1.0
5166,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_30,matharena,answer,Final Answer Parser,1.0
5167,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_31,matharena,answer,Final Answer Parser,1.0
5168,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_32,matharena,answer,Final Answer Parser,0.0
5169,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_33,matharena,answer,Final Answer Parser,1.0
5170,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_34,matharena,answer,Final Answer Parser,0.0
5171,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_35,matharena,answer,Final Answer Parser,1.0
5172,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_36,matharena,answer,Final Answer Parser,1.0
5173,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_37,matharena,answer,Final Answer Parser,0.0
5174,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_38,matharena,answer,Final Answer Parser,0.0
5175,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_39,matharena,answer,Final Answer Parser,0.0
5176,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_4,matharena,answer,Final Answer Parser,1.0
5177,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_40,matharena,answer,Final Answer Parser,0.0
5178,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_5,matharena,answer,Final Answer Parser,1.0
5179,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_6,matharena,answer,Final Answer Parser,1.0
5180,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_7,matharena,answer,Final Answer Parser,0.0
5181,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_8,matharena,answer,Final Answer Parser,1.0
5182,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_cmimc/cmimc_2025_9,matharena,answer,Final Answer Parser,0.0
5183,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_1,matharena,answer,Final Answer Parser,1.0
5184,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_10,matharena,answer,Final Answer Parser,1.0
5185,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_11,matharena,answer,Final Answer Parser,0.0
5186,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_12,matharena,answer,Final Answer Parser,1.0
5187,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_13,matharena,answer,Final Answer Parser,0.0
5188,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_14,matharena,answer,Final Answer Parser,0.0
5189,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_15,matharena,answer,Final Answer Parser,0.0
5190,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_16,matharena,answer,Final Answer Parser,1.0
5191,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_17,matharena,answer,Final Answer Parser,0.0
5192,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_18,matharena,answer,Final Answer Parser,0.0
5193,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_19,matharena,answer,Final Answer Parser,0.0
5194,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_2,matharena,answer,Final Answer Parser,1.0
5195,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_20,matharena,answer,Final Answer Parser,0.0
5196,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_21,matharena,answer,Final Answer Parser,1.0
5197,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_22,matharena,answer,Final Answer Parser,1.0
5198,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_23,matharena,answer,Final Answer Parser,1.0
5199,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_24,matharena,answer,Final Answer Parser,1.0
5200,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_25,matharena,answer,Final Answer Parser,0.0
5201,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_26,matharena,answer,Final Answer Parser,1.0
5202,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_27,matharena,answer,Final Answer Parser,1.0
5203,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_28,matharena,answer,Final Answer Parser,1.0
5204,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_29,matharena,answer,Final Answer Parser,1.0
5205,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_3,matharena,answer,Final Answer Parser,0.0
5206,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_30,matharena,answer,Final Answer Parser,0.0
5207,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_4,matharena,answer,Final Answer Parser,1.0
5208,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_5,matharena,answer,Final Answer Parser,1.0
5209,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_6,matharena,answer,Final Answer Parser,0.0
5210,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_7,matharena,answer,Final Answer Parser,1.0
5211,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_8,matharena,answer,Final Answer Parser,1.0
5212,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_hmmt/hmmt_feb_2025_9,matharena,answer,Final Answer Parser,1.0
5213,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_1,matharena,answer,Final Answer Parser,1.0
5214,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_10,matharena,answer,Final Answer Parser,1.0
5215,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_11,matharena,answer,Final Answer Parser,1.0
5216,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_12,matharena,answer,Final Answer Parser,1.0
5217,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_13,matharena,answer,Final Answer Parser,1.0
5218,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_14,matharena,answer,Final Answer Parser,1.0
5219,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_15,matharena,answer,Final Answer Parser,1.0
5220,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_16,matharena,answer,Final Answer Parser,1.0
5221,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_17,matharena,answer,Final Answer Parser,1.0
5222,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_18,matharena,answer,Final Answer Parser,1.0
5223,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_19,matharena,answer,Final Answer Parser,1.0
5224,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_2,matharena,answer,Final Answer Parser,1.0
5225,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_20,matharena,answer,Final Answer Parser,0.0
5226,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_21,matharena,answer,Final Answer Parser,1.0
5227,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_22,matharena,answer,Final Answer Parser,0.0
5228,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_23,matharena,answer,Final Answer Parser,1.0
5229,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_24,matharena,answer,Final Answer Parser,1.0
5230,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_25,matharena,answer,Final Answer Parser,1.0
5231,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_26,matharena,answer,Final Answer Parser,1.0
5232,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_27,matharena,answer,Final Answer Parser,0.0
5233,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_28,matharena,answer,Final Answer Parser,1.0
5234,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_29,matharena,answer,Final Answer Parser,1.0
5235,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_3,matharena,answer,Final Answer Parser,1.0
5236,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_30,matharena,answer,Final Answer Parser,0.0
5237,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_31,matharena,answer,Final Answer Parser,1.0
5238,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_32,matharena,answer,Final Answer Parser,0.0
5239,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_33,matharena,answer,Final Answer Parser,1.0
5240,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_34,matharena,answer,Final Answer Parser,1.0
5241,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_35,matharena,answer,Final Answer Parser,1.0
5242,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_36,matharena,answer,Final Answer Parser,1.0
5243,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_37,matharena,answer,Final Answer Parser,1.0
5244,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_38,matharena,answer,Final Answer Parser,0.0
5245,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_39,matharena,answer,Final Answer Parser,1.0
5246,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_4,matharena,answer,Final Answer Parser,1.0
5247,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_40,matharena,answer,Final Answer Parser,1.0
5248,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_41,matharena,answer,Final Answer Parser,0.0
5249,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_42,matharena,answer,Final Answer Parser,0.0
5250,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_43,matharena,answer,Final Answer Parser,0.0
5251,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_44,matharena,answer,Final Answer Parser,1.0
5252,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_45,matharena,answer,Final Answer Parser,1.0
5253,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_46,matharena,answer,Final Answer Parser,1.0
5254,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_47,matharena,answer,Final Answer Parser,1.0
5255,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_48,matharena,answer,Final Answer Parser,1.0
5256,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_49,matharena,answer,Final Answer Parser,1.0
5257,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_5,matharena,answer,Final Answer Parser,1.0
5258,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_50,matharena,answer,Final Answer Parser,0.0
5259,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_51,matharena,answer,Final Answer Parser,0.0
5260,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_52,matharena,answer,Final Answer Parser,1.0
5261,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_53,matharena,answer,Final Answer Parser,0.0
5262,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_6,matharena,answer,Final Answer Parser,1.0
5263,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_7,matharena,answer,Final Answer Parser,1.0
5264,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_8,matharena,answer,Final Answer Parser,0.0
5265,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,matharena_smt/smt_2025_9,matharena,answer,Final Answer Parser,0.0
5266,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,nordic_2025_1,nordic,proofs,OPC R1 8B,1.0
5267,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,nordic_2025_2,nordic,proofs,OPC R1 8B,0.0
5268,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,nordic_2025_3,nordic,proofs,OPC R1 8B,1.0
5269,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,pan_african_2025_1,pan,proofs,OPC R1 8B,0.0
5270,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,pan_african_2025_2,pan,proofs,OPC R1 8B,1.0
5271,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,pan_african_2025_3,pan,proofs,OPC R1 8B,1.0
5272,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,pan_african_2025_4,pan,proofs,OPC R1 8B,1.0
5273,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,pan_african_2025_5,pan,proofs,OPC R1 8B,1.0
5274,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,pan_african_2025_6,pan,proofs,OPC R1 8B,0.0
5275,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,philippines_2025_1,philippines,proofs,OPC R1 8B,0.0
5276,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,philippines_2025_2,philippines,proofs,OPC R1 8B,0.0
5277,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,philippines_2025_3,philippines,proofs,OPC R1 8B,0.0
5278,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,philippines_2025_4,philippines,proofs,OPC R1 8B,0.0
5279,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,philippines_2025_5,philippines,proofs,OPC R1 8B,0.0
5280,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,philippines_2025_6,philippines,proofs,OPC R1 8B,0.0
5281,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,philippines_2025_7,philippines,proofs,OPC R1 8B,0.0
5282,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,philippines_2025_8,philippines,proofs,OPC R1 8B,0.0
5283,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,polish_2025_1,polish,proofs,OPC R1 8B,0.0
5284,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,polish_2025_2,polish,proofs,OPC R1 8B,1.0
5285,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,polish_2025_3,polish,proofs,OPC R1 8B,1.0
5286,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,polish_2025_4,polish,proofs,OPC R1 8B,0.0
5287,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,polish_2025_5,polish,proofs,OPC R1 8B,0.0
5288,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,polish_2025_6,polish,proofs,OPC R1 8B,0.0
5289,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,rmm_2025_1,rmm,proofs,OPC R1 8B,0.0
5290,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,rmm_2025_2,rmm,proofs,OPC R1 8B,1.0
5291,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,rmm_2025_3,rmm,proofs,OPC R1 8B,0.0
5292,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,rmm_2025_4,rmm,proofs,OPC R1 8B,0.0
5293,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,rmm_2025_5,rmm,proofs,OPC R1 8B,0.0
5294,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,rmm_2025_6,rmm,proofs,OPC R1 8B,0.0
5295,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_10_2025_1,romania,proofs,OPC R1 8B,0.0
5296,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_10_2025_2,romania,proofs,OPC R1 8B,0.0
5297,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_10_2025_3,romania,proofs,OPC R1 8B,1.0
5298,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_11_2025_1,romania,proofs,OPC R1 8B,0.0
5299,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_11_2025_2,romania,proofs,OPC R1 8B,0.0
5300,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_11_2025_3,romania,proofs,OPC R1 8B,0.0
5301,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_12_2025_1,romania,proofs,OPC R1 8B,0.0
5302,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_12_2025_2,romania,proofs,OPC R1 8B,1.0
5303,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_12_2025_3,romania,proofs,OPC R1 8B,0.0
5304,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_9_2025_1,romania,proofs,OPC R1 8B,0.0
5305,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_9_2025_2,romania,proofs,OPC R1 8B,0.0
5306,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_tst_2025_1,romania,proofs,OPC R1 8B,0.0
5307,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_tst_2025_2,romania,proofs,OPC R1 8B,0.0
5308,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,romania_tst_2025_3,romania,proofs,OPC R1 8B,0.0
5309,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,serbia_tst_bmo_2025_1,serbia,proofs,OPC R1 8B,0.0
5310,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,serbia_tst_bmo_2025_2,serbia,proofs,OPC R1 8B,0.0
5311,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,serbia_tst_bmo_2025_3,serbia,proofs,OPC R1 8B,0.0
5312,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,serbia_tst_bmo_2025_4,serbia,proofs,OPC R1 8B,0.0
5313,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,spain_2025_1,spain,proofs,OPC R1 8B,0.0
5314,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,spain_2025_2,spain,proofs,OPC R1 8B,1.0
5315,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,spain_2025_3,spain,proofs,OPC R1 8B,0.0
5316,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,spain_2025_4,spain,proofs,OPC R1 8B,0.0
5317,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,spain_2025_5,spain,proofs,OPC R1 8B,0.0
5318,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,thai_2025_1,thai,proofs,OPC R1 8B,1.0
5319,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,thai_2025_10,thai,proofs,OPC R1 8B,0.0
5320,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,thai_2025_2,thai,proofs,OPC R1 8B,1.0
5321,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,thai_2025_3,thai,proofs,OPC R1 8B,0.0
5322,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,thai_2025_4,thai,proofs,OPC R1 8B,0.0
5323,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,thai_2025_5,thai,proofs,OPC R1 8B,0.0
5324,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,thai_2025_6,thai,proofs,OPC R1 8B,1.0
5325,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,thai_2025_7,thai,proofs,OPC R1 8B,0.0
5326,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,thai_2025_8,thai,proofs,OPC R1 8B,0.0
5327,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,thai_2025_9,thai,proofs,OPC R1 8B,0.0
5328,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,turkey_tst_2025_1,turkey,proofs,OPC R1 8B,0.0
5329,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,turkey_tst_2025_2,turkey,proofs,OPC R1 8B,0.0
5330,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,turkey_tst_2025_3,turkey,proofs,OPC R1 8B,0.0
5331,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,turkey_tst_2025_4,turkey,proofs,OPC R1 8B,0.0
5332,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,turkey_tst_2025_5,turkey,proofs,OPC R1 8B,0.0
5333,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,turkey_tst_2025_6,turkey,proofs,OPC R1 8B,0.0
5334,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,turkey_tst_2025_7,turkey,proofs,OPC R1 8B,0.0
5335,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,turkey_tst_2025_8,turkey,proofs,OPC R1 8B,0.0
5336,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,turkey_tst_2025_9,turkey,proofs,OPC R1 8B,0.0
5337,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usamo_2025_1,usamo,proofs,OPC R1 8B,0.0
5338,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usamo_2025_2,usamo,proofs,OPC R1 8B,0.0
5339,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usamo_2025_3,usamo,proofs,OPC R1 8B,0.0
5340,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usamo_2025_4,usamo,proofs,OPC R1 8B,0.0
5341,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usamo_2025_5,usamo,proofs,OPC R1 8B,0.0
5342,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usamo_2025_6,usamo,proofs,OPC R1 8B,0.0
5343,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usatst_2025_1,usatst,proofs,OPC R1 8B,1.0
5344,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usatst_2025_2,usatst,proofs,OPC R1 8B,0.0
5345,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usatst_2025_3,usatst,proofs,OPC R1 8B,0.0
5346,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usatst_2025_4,usatst,proofs,OPC R1 8B,0.0
5347,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usatst_2025_5,usatst,proofs,OPC R1 8B,0.0
5348,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usatst_2025_6,usatst,proofs,OPC R1 8B,0.0
5349,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usatst_2025_7,usatst,proofs,OPC R1 8B,0.0
5350,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usatst_2025_8,usatst,proofs,OPC R1 8B,0.0
5351,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,usatst_2025_9,usatst,proofs,OPC R1 8B,0.0
5352,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,vietnam_2025_1,vietnam,proofs,OPC R1 8B,1.0
5353,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,vietnam_2025_2,vietnam,proofs,OPC R1 8B,0.0
5354,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,vietnam_2025_3,vietnam,proofs,OPC R1 8B,0.0
5355,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,vietnam_2025_4,vietnam,proofs,OPC R1 8B,0.0
5356,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,vietnam_2025_5,vietnam,proofs,OPC R1 8B,0.0
5357,Qwen3-4B (25/07)(Adv 0.9 - best checkpoint),trained/best_model,vietnam_2025_6,vietnam,proofs,OPC R1 8B,0.0