{
  "ArmorRM_scores": {
    "accuracy": 0.6652012383900929,
    "precision": 0.824317086234601,
    "recall": 0.2779684282772821,
    "f1": 0.4157436922578205,
    "true_positives": 7695,
    "true_negatives": 35277,
    "false_positives": 1640,
    "false_negatives": 19988,
    "total": 64600,
    "normalized_precision": 0.3817314659897154,
    "normalized_recall": 0.16930439122577767,
    "selection_accuracy": 0.5201238390092879,
    "solvable_selection_accuracy": 0.6424474187380497
  },
  "DecisionTreeReward27B_scores": {
    "accuracy": 0.6852631578947368,
    "precision": 0.8207522471419845,
    "recall": 0.3397391901166781,
    "f1": 0.4805579684226662,
    "true_positives": 9405,
    "true_negatives": 34863,
    "false_positives": 2054,
    "false_negatives": 18278,
    "total": 64600,
    "normalized_precision": 0.4932333410325353,
    "normalized_recall": 0.21344394882651552,
    "selection_accuracy": 0.4891640866873065,
    "solvable_selection_accuracy": 0.6042065009560229
  },
  "DecisionTreeReward8B_scores": {
    "accuracy": 0.686780185758514,
    "precision": 0.8284101931046645,
    "recall": 0.3393779575913015,
    "f1": 0.48149856498564986,
    "true_positives": 9395,
    "true_negatives": 34971,
    "false_positives": 1946,
    "false_negatives": 18288,
    "total": 64600,
    "normalized_precision": 0.4726445332630722,
    "normalized_recall": 0.21107966429984024,
    "selection_accuracy": 0.5077399380804953,
    "solvable_selection_accuracy": 0.627151051625239
  },
  "EurusPRMStage1_avg_scores": {
    "accuracy": 0.5480434782608695,
    "precision": 0.42558309800230104,
    "recall": 0.1469855145757324,
    "f1": 0.21850499409300828,
    "true_positives": 4069,
    "true_negatives": 31225,
    "false_positives": 5492,
    "false_negatives": 23614,
    "total": 64400,
    "normalized_precision": 0.3894532288287386,
    "normalized_recall": 0.14944537431713342,
    "selection_accuracy": 0.4409937888198758,
    "solvable_selection_accuracy": 0.5430210325047801
  },
  "EurusPRMStage1_max_scores": {
    "accuracy": 0.5713198757763975,
    "precision": 0.5402542372881356,
    "recall": 0.01842285879420583,
    "f1": 0.03563069829182241,
    "true_positives": 510,
    "true_negatives": 36283,
    "false_positives": 434,
    "false_negatives": 27173,
    "total": 64400,
    "normalized_precision": 0.16998668537817305,
    "normalized_recall": 0.017239301223826754,
    "selection_accuracy": 0.4549689440993789,
    "solvable_selection_accuracy": 0.5602294455066922
  },
  "EurusPRMStage1_min_scores": {
    "accuracy": 0.42990683229813664,
    "precision": 0.42980833501733223,
    "recall": 0.9988079326662572,
    "f1": 0.6009955006846784,
    "true_positives": 27650,
    "true_negatives": 36,
    "false_positives": 36681,
    "false_negatives": 33,
    "total": 64400,
    "normalized_precision": 0.5294465732119032,
    "normalized_recall": 0.9991064788736781,
    "selection_accuracy": 0.4472049689440994,
    "solvable_selection_accuracy": 0.5506692160611855
  },
  "EurusPRMStage2_avg_scores": {
    "accuracy": 0.45773291925465837,
    "precision": 0.44117408051487916,
    "recall": 0.9805656901347397,
    "f1": 0.6085504192261131,
    "true_positives": 27145,
    "true_negatives": 2333,
    "false_positives": 34384,
    "false_negatives": 538,
    "total": 64400,
    "normalized_precision": 0.5318016645269127,
    "normalized_recall": 0.9691945245085736,
    "selection_accuracy": 0.4751552795031056,
    "solvable_selection_accuracy": 0.5850860420650096
  },
  "EurusPRMStage2_max_scores": {
    "accuracy": 0.4298757763975155,
    "precision": 0.42986692339943167,
    "recall": 1.0,
    "f1": 0.6012684346560674,
    "true_positives": 27683,
    "true_negatives": 1,
    "false_positives": 36716,
    "false_negatives": 0,
    "total": 64400,
    "normalized_precision": 0.5293116634799232,
    "normalized_recall": 1.0,
    "selection_accuracy": 0.4301242236024845,
    "solvable_selection_accuracy": 0.5296367112810707
  },
  "EurusPRMStage2_min_scores": {
    "accuracy": 0.4595962732919255,
    "precision": 0.44093194603474883,
    "recall": 0.9598309431781238,
    "f1": 0.6042708996634222,
    "true_positives": 26571,
    "true_negatives": 3027,
    "false_positives": 33690,
    "false_negatives": 1112,
    "total": 64400,
    "normalized_precision": 0.532045703623051,
    "normalized_recall": 0.9481982164093289,
    "selection_accuracy": 0.4798136645962733,
    "solvable_selection_accuracy": 0.5908221797323135
  },
  "GPM_scores": {
    "accuracy": 0.5664860681114551,
    "precision": 0.3933774834437086,
    "recall": 0.021457212007369143,
    "f1": 0.04069468708251978,
    "true_positives": 594,
    "true_negatives": 36001,
    "false_positives": 916,
    "false_negatives": 27089,
    "total": 64600,
    "normalized_precision": 0.43231661051928744,
    "normalized_recall": 0.025086587638076845,
    "selection_accuracy": 0.43034055727554177,
    "solvable_selection_accuracy": 0.5315487571701721
  },
  "GRMGemma_scores": {
    "accuracy": 0.6777708978328173,
    "precision": 0.7995289191311175,
    "recall": 0.3310696095076401,
    "f1": 0.468247075052368,
    "true_positives": 9165,
    "true_negatives": 34619,
    "false_positives": 2298,
    "false_negatives": 18518,
    "total": 64600,
    "normalized_precision": 0.5042593510220258,
    "normalized_recall": 0.212226171287552,
    "selection_accuracy": 0.47678018575851394,
    "solvable_selection_accuracy": 0.5889101338432122
  },
  "GRMLlama32_scores": {
    "accuracy": 0.6337151702786378,
    "precision": 0.6392602341206622,
    "recall": 0.3333814976700502,
    "f1": 0.43822412155745494,
    "true_positives": 9229,
    "true_negatives": 31709,
    "false_positives": 5208,
    "false_negatives": 18454,
    "total": 64600,
    "normalized_precision": 0.48599091921482007,
    "normalized_recall": 0.23807982308198633,
    "selection_accuracy": 0.47832817337461303,
    "solvable_selection_accuracy": 0.5908221797323135
  },
  "GRM_scores": {
    "accuracy": 0.6130340557275542,
    "precision": 0.8459160010306622,
    "recall": 0.11859263808113282,
    "f1": 0.2080217969839057,
    "true_positives": 3283,
    "true_negatives": 36319,
    "false_positives": 598,
    "false_negatives": 24400,
    "total": 64600,
    "normalized_precision": 0.28972114285722106,
    "normalized_recall": 0.07227068076291714,
    "selection_accuracy": 0.5154798761609907,
    "solvable_selection_accuracy": 0.6367112810707457
  },
  "INFORM_scores": {
    "accuracy": 0.6491795665634675,
    "precision": 0.8666374525270231,
    "recall": 0.21431925730592782,
    "f1": 0.34365316111095023,
    "true_positives": 5933,
    "true_negatives": 36004,
    "false_positives": 913,
    "false_negatives": 21750,
    "total": 64600,
    "normalized_precision": 0.34985899534351017,
    "normalized_recall": 0.12691727154415458,
    "selection_accuracy": 0.5356037151702786,
    "solvable_selection_accuracy": 0.6615678776290631
  },
  "InternLM2Reward7B_scores": {
    "accuracy": 0.6646130030959753,
    "precision": 0.8247867861384001,
    "recall": 0.27598164938771086,
    "f1": 0.41357657121203917,
    "true_positives": 7640,
    "true_negatives": 35294,
    "false_positives": 1623,
    "false_negatives": 20043,
    "total": 64600,
    "normalized_precision": 0.3745112017994039,
    "normalized_recall": 0.16713422096239644,
    "selection_accuracy": 0.4907120743034056,
    "solvable_selection_accuracy": 0.6061185468451242
  },
  "InternLM2RewardModel_scores": {
    "accuracy": 0.6650154798761609,
    "precision": 0.8268253109789075,
    "recall": 0.2761261423978615,
    "f1": 0.4139948006932409,
    "true_positives": 7644,
    "true_negatives": 35316,
    "false_positives": 1601,
    "false_negatives": 20039,
    "total": 64600,
    "normalized_precision": 0.4216837501539928,
    "normalized_recall": 0.16849952937675092,
    "selection_accuracy": 0.5123839009287926,
    "solvable_selection_accuracy": 0.6328871892925431
  },
  "LDLRewardGemma_scores": {
    "accuracy": 0.6906811145510836,
    "precision": 0.6341497404452496,
    "recall": 0.6575154426904598,
    "f1": 0.6456212535026425,
    "true_positives": 18202,
    "true_negatives": 26416,
    "false_positives": 10501,
    "false_negatives": 9481,
    "total": 64600,
    "normalized_precision": 0.5771151412235742,
    "normalized_recall": 0.5451930108381341,
    "selection_accuracy": 0.5046439628482973,
    "solvable_selection_accuracy": 0.6233269598470363
  },
  "OffsetBias_scores": {
    "accuracy": 0.6700773993808049,
    "precision": 0.8261314765513005,
    "recall": 0.2914424014738287,
    "f1": 0.430879329221074,
    "true_positives": 8068,
    "true_negatives": 35219,
    "false_positives": 1698,
    "false_negatives": 19615,
    "total": 64600,
    "normalized_precision": 0.4077505159509464,
    "normalized_recall": 0.17707372622527123,
    "selection_accuracy": 0.5201238390092879,
    "solvable_selection_accuracy": 0.6424474187380497
  },
  "QRMGemma_scores": {
    "accuracy": 0.6813157894736842,
    "precision": 0.8249679428466752,
    "recall": 0.32536213560669003,
    "f1": 0.4666718478795886,
    "true_positives": 9007,
    "true_negatives": 35006,
    "false_positives": 1911,
    "false_negatives": 18676,
    "total": 64600,
    "normalized_precision": 0.4589934691215987,
    "normalized_recall": 0.20118154600883748,
    "selection_accuracy": 0.5092879256965944,
    "solvable_selection_accuracy": 0.6290630975143403
  },
  "QRM_scores": {
    "accuracy": 0.6776934984520124,
    "precision": 0.8233741753063148,
    "recall": 0.31557273416898457,
    "f1": 0.4562713811923851,
    "true_positives": 8736,
    "true_negatives": 35043,
    "false_positives": 1874,
    "false_negatives": 18947,
    "total": 64600,
    "normalized_precision": 0.4477228153365588,
    "normalized_recall": 0.19580886941622827,
    "selection_accuracy": 0.5092879256965944,
    "solvable_selection_accuracy": 0.6290630975143403
  },
  "Qwen72B_scores": {
    "accuracy": 0.5541795665634675,
    "precision": 0.4809483199727102,
    "recall": 0.5093017375284471,
    "f1": 0.49471911295133164,
    "true_positives": 14099,
    "true_negatives": 21701,
    "false_positives": 15216,
    "false_negatives": 13584,
    "total": 64600,
    "normalized_precision": 0.5536028158832134,
    "normalized_recall": 0.4820268984744129,
    "selection_accuracy": 0.48761609907120745,
    "solvable_selection_accuracy": 0.6022944550669216
  },
  "QwenPRM_avg_scores": {
    "accuracy": 0.4311145510835913,
    "precision": 0.4295351042168581,
    "recall": 0.9982660838781924,
    "f1": 0.6006302977613561,
    "true_positives": 27635,
    "true_negatives": 215,
    "false_positives": 36702,
    "false_negatives": 48,
    "total": 64600,
    "normalized_precision": 0.5294547429976852,
    "normalized_recall": 0.9978745072728178,
    "selection_accuracy": 0.4040247678018576,
    "solvable_selection_accuracy": 0.49904397705544934
  },
  "QwenPRM_max_scores": {
    "accuracy": 0.431640866873065,
    "precision": 0.429864745252108,
    "recall": 0.9999638767474623,
    "f1": 0.60125977410947,
    "true_positives": 27682,
    "true_negatives": 202,
    "false_positives": 36715,
    "false_negatives": 1,
    "total": 64600,
    "normalized_precision": 0.5293213794005923,
    "normalized_recall": 0.9999705839093984,
    "selection_accuracy": 0.44891640866873067,
    "solvable_selection_accuracy": 0.5544933078393881
  },
  "QwenPRM_min_scores": {
    "accuracy": 0.5131578947368421,
    "precision": 0.4499401993355482,
    "recall": 0.6115305422100206,
    "f1": 0.5184357199730507,
    "true_positives": 16929,
    "true_negatives": 16221,
    "false_positives": 20696,
    "false_negatives": 10754,
    "total": 64600,
    "normalized_precision": 0.5231564166743803,
    "normalized_recall": 0.581340742919108,
    "selection_accuracy": 0.42105263157894735,
    "solvable_selection_accuracy": 0.5200764818355641
  },
  "SkyworksGemma_scores": {
    "accuracy": 0.6664086687306502,
    "precision": 0.8248066942061222,
    "recall": 0.2812917675107467,
    "f1": 0.41951298351470745,
    "true_positives": 7787,
    "true_negatives": 35263,
    "false_positives": 1654,
    "false_negatives": 19896,
    "total": 64600,
    "normalized_precision": 0.3949557067813011,
    "normalized_recall": 0.17067923449821418,
    "selection_accuracy": 0.521671826625387,
    "solvable_selection_accuracy": 0.6443594646271511
  },
  "Skyworks_scores": {
    "accuracy": 0.5900619195046439,
    "precision": 0.8441260744985674,
    "recall": 0.053209550987970956,
    "f1": 0.1001087399755335,
    "true_positives": 1473,
    "true_negatives": 36645,
    "false_positives": 272,
    "false_negatives": 26210,
    "total": 64600,
    "normalized_precision": 0.2001532671704756,
    "normalized_recall": 0.03361554212991991,
    "selection_accuracy": 0.47987616099071206,
    "solvable_selection_accuracy": 0.5927342256214149
  },
  "URM_scores": {
    "accuracy": 0.5612848297213622,
    "precision": 0.4070096099491238,
    "recall": 0.05201748365422822,
    "f1": 0.09224560392043817,
    "true_positives": 1440,
    "true_negatives": 34819,
    "false_positives": 2098,
    "false_negatives": 26243,
    "total": 64600,
    "normalized_precision": 0.32611823075448065,
    "normalized_recall": 0.050121203666751345,
    "selection_accuracy": 0.42569659442724456,
    "solvable_selection_accuracy": 0.5258126195028681
  }
}