{
  "ArmorRM_scores": {
    "accuracy": 0.35977777777777775,
    "precision": 0.32079376673971266,
    "recall": 0.9350603264726757,
    "f1": 0.47770123277737486,
    "true_positives": 2635,
    "true_negatives": 603,
    "false_positives": 5579,
    "false_negatives": 183,
    "total": 9000,
    "normalized_precision": 0.5475609352639645,
    "normalized_recall": 0.914222683955921,
    "selection_accuracy": 0.2777777777777778,
    "solvable_selection_accuracy": 0.4807692307692308
  },
  "EurusPRMStage1_avg_scores": {
    "accuracy": 0.6173333333333333,
    "precision": 0.31076178960096734,
    "recall": 0.1823988644428673,
    "f1": 0.2298747763864043,
    "true_positives": 514,
    "true_negatives": 5042,
    "false_positives": 1140,
    "false_negatives": 2304,
    "total": 9000,
    "normalized_precision": 0.5092315428197374,
    "normalized_recall": 0.19244561065391108,
    "selection_accuracy": 0.32222222222222224,
    "solvable_selection_accuracy": 0.5576923076923077
  },
  "EurusPRMStage1_max_scores": {
    "accuracy": 0.6895555555555556,
    "precision": 0.6395348837209303,
    "recall": 0.019517388218594747,
    "f1": 0.03787878787878788,
    "true_positives": 55,
    "true_negatives": 6151,
    "false_positives": 31,
    "false_negatives": 2763,
    "total": 9000,
    "normalized_precision": 0.23513313609467457,
    "normalized_recall": 0.014222951448883213,
    "selection_accuracy": 0.34444444444444444,
    "solvable_selection_accuracy": 0.5961538461538461
  },
  "EurusPRMStage1_min_scores": {
    "accuracy": 0.31277777777777777,
    "precision": 0.31279884354497944,
    "recall": 0.9982256919801278,
    "f1": 0.4763356193379054,
    "true_positives": 2813,
    "true_negatives": 2,
    "false_positives": 6180,
    "false_negatives": 5,
    "total": 9000,
    "normalized_precision": 0.5419043675622622,
    "normalized_recall": 0.9990087232355273,
    "selection_accuracy": 0.35555555555555557,
    "solvable_selection_accuracy": 0.6153846153846154
  },
  "EurusPRMStage2_avg_scores": {
    "accuracy": 0.328,
    "precision": 0.31325161887141534,
    "recall": 0.9613200851667849,
    "f1": 0.47252747252747246,
    "true_positives": 2709,
    "true_negatives": 243,
    "false_positives": 5939,
    "false_negatives": 109,
    "total": 9000,
    "normalized_precision": 0.5448674623586098,
    "normalized_recall": 0.9665594506203323,
    "selection_accuracy": 0.37777777777777777,
    "solvable_selection_accuracy": 0.6538461538461539
  },
  "EurusPRMStage2_max_scores": {
    "accuracy": 0.6873333333333334,
    "precision": 1.0,
    "recall": 0.0014194464158978,
    "f1": 0.002834868887313962,
    "true_positives": 4,
    "true_negatives": 6182,
    "false_positives": 0,
    "false_negatives": 2814,
    "total": 9000,
    "normalized_precision": 0.057692307692307696,
    "normalized_recall": 0.0013964676894300462,
    "selection_accuracy": 0.36666666666666664,
    "solvable_selection_accuracy": 0.6346153846153846
  },
  "EurusPRMStage2_min_scores": {
    "accuracy": 0.31322222222222224,
    "precision": 0.31297964631297964,
    "recall": 0.9985805535841022,
    "f1": 0.47658565500889155,
    "true_positives": 2814,
    "true_negatives": 5,
    "false_positives": 6177,
    "false_negatives": 4,
    "total": 9000,
    "normalized_precision": 0.5420855186480187,
    "normalized_recall": 0.9992069785884219,
    "selection_accuracy": 0.36666666666666664,
    "solvable_selection_accuracy": 0.6346153846153846
  },
  "GPM_scores": {
    "accuracy": 0.6782222222222222,
    "precision": 0.3211009174311927,
    "recall": 0.0248403122782115,
    "f1": 0.04611330698287221,
    "true_positives": 70,
    "true_negatives": 6034,
    "false_positives": 148,
    "false_negatives": 2748,
    "total": 9000,
    "normalized_precision": 0.5724358974358974,
    "normalized_recall": 0.021652673952576543,
    "selection_accuracy": 0.3333333333333333,
    "solvable_selection_accuracy": 0.5769230769230769
  },
  "GRMGemma_scores": {
    "accuracy": 0.4325555555555556,
    "precision": 0.3292555572131881,
    "recall": 0.783179559971611,
    "f1": 0.46360676399537853,
    "true_positives": 2207,
    "true_negatives": 1686,
    "false_positives": 4496,
    "false_negatives": 611,
    "total": 9000,
    "normalized_precision": 0.565035162318597,
    "normalized_recall": 0.7358699031979163,
    "selection_accuracy": 0.35555555555555557,
    "solvable_selection_accuracy": 0.6153846153846154
  },
  "GRMLlama32_scores": {
    "accuracy": 0.43777777777777777,
    "precision": 0.3225704336815448,
    "recall": 0.723207948899929,
    "f1": 0.44614711033274956,
    "true_positives": 2038,
    "true_negatives": 1902,
    "false_positives": 4280,
    "false_negatives": 780,
    "total": 9000,
    "normalized_precision": 0.5599333095714822,
    "normalized_recall": 0.6936076541080504,
    "selection_accuracy": 0.3111111111111111,
    "solvable_selection_accuracy": 0.5384615384615384
  },
  "GRM_scores": {
    "accuracy": 0.6102222222222222,
    "precision": 0.3525641025641026,
    "recall": 0.2927608232789212,
    "f1": 0.31989143078712684,
    "true_positives": 825,
    "true_negatives": 4667,
    "false_positives": 1515,
    "false_negatives": 1993,
    "total": 9000,
    "normalized_precision": 0.4722533914845587,
    "normalized_recall": 0.26224480393679966,
    "selection_accuracy": 0.2777777777777778,
    "solvable_selection_accuracy": 0.4807692307692308
  },
  "INFORM_scores": {
    "accuracy": 0.5248888888888888,
    "precision": 0.3420026007802341,
    "recall": 0.559971611071682,
    "f1": 0.42465016146393975,
    "true_positives": 1578,
    "true_negatives": 3146,
    "false_positives": 3036,
    "false_negatives": 1240,
    "total": 9000,
    "normalized_precision": 0.5692262014352157,
    "normalized_recall": 0.518274236353432,
    "selection_accuracy": 0.34444444444444444,
    "solvable_selection_accuracy": 0.5961538461538461
  },
  "InternLM2Reward7B_scores": {
    "accuracy": 0.7108888888888889,
    "precision": 0.7186234817813765,
    "recall": 0.12597586941092973,
    "f1": 0.21437198067632846,
    "true_positives": 355,
    "true_negatives": 6043,
    "false_positives": 139,
    "false_negatives": 2463,
    "total": 9000,
    "normalized_precision": 0.30335129613975764,
    "normalized_recall": 0.08908604504225194,
    "selection_accuracy": 0.3111111111111111,
    "solvable_selection_accuracy": 0.5384615384615384
  },
  "InternLM2RewardModel_scores": {
    "accuracy": 0.5558888888888889,
    "precision": 0.3498089171974522,
    "recall": 0.4872249822569198,
    "f1": 0.40723713480646595,
    "true_positives": 1373,
    "true_negatives": 3630,
    "false_positives": 2552,
    "false_negatives": 1445,
    "total": 9000,
    "normalized_precision": 0.5459815277151872,
    "normalized_recall": 0.46507518722576713,
    "selection_accuracy": 0.26666666666666666,
    "solvable_selection_accuracy": 0.46153846153846156
  },
  "LDLRewardGemma_scores": {
    "accuracy": 0.5868888888888889,
    "precision": 0.27941176470588236,
    "recall": 0.20227111426543648,
    "f1": 0.23466447097571017,
    "true_positives": 570,
    "true_negatives": 4712,
    "false_positives": 1470,
    "false_negatives": 2248,
    "total": 9000,
    "normalized_precision": 0.471198118098499,
    "normalized_recall": 0.24643143400478118,
    "selection_accuracy": 0.26666666666666666,
    "solvable_selection_accuracy": 0.46153846153846156
  },
  "OffsetBias_scores": {
    "accuracy": 0.442,
    "precision": 0.32351057014734147,
    "recall": 0.716820440028389,
    "f1": 0.4458177002869125,
    "true_positives": 2020,
    "true_negatives": 1958,
    "false_positives": 4224,
    "false_negatives": 798,
    "total": 9000,
    "normalized_precision": 0.5660346238118896,
    "normalized_recall": 0.685782067147472,
    "selection_accuracy": 0.3,
    "solvable_selection_accuracy": 0.5192307692307693
  },
  "QRMGemma_scores": {
    "accuracy": 0.42633333333333334,
    "precision": 0.32703938634016816,
    "recall": 0.7867281760113556,
    "f1": 0.46201938105658014,
    "true_positives": 2217,
    "true_negatives": 1620,
    "false_positives": 4562,
    "false_negatives": 601,
    "total": 9000,
    "normalized_precision": 0.5653917062262488,
    "normalized_recall": 0.7406660866777629,
    "selection_accuracy": 0.3111111111111111,
    "solvable_selection_accuracy": 0.5384615384615384
  },
  "QRM_scores": {
    "accuracy": 0.42077777777777775,
    "precision": 0.32717563862029153,
    "recall": 0.8044712562100781,
    "f1": 0.4651687698779111,
    "true_positives": 2267,
    "true_negatives": 1520,
    "false_positives": 4662,
    "false_negatives": 551,
    "total": 9000,
    "normalized_precision": 0.5650892676992967,
    "normalized_recall": 0.7571928435524059,
    "selection_accuracy": 0.3333333333333333,
    "solvable_selection_accuracy": 0.5769230769230769
  },
  "Qwen72B_scores": {
    "accuracy": 0.7868888888888889,
    "precision": 0.7279635258358662,
    "recall": 0.5099361249112846,
    "f1": 0.5997495826377295,
    "true_positives": 1437,
    "true_negatives": 5645,
    "false_positives": 537,
    "false_negatives": 1381,
    "total": 9000,
    "normalized_precision": 0.5753790608901345,
    "normalized_recall": 0.43051181230192476,
    "selection_accuracy": 0.4222222222222222,
    "solvable_selection_accuracy": 0.7307692307692307
  },
  "QwenPRM_avg_scores": {
    "accuracy": 0.32944444444444443,
    "precision": 0.31346399165023775,
    "recall": 0.9591909155429382,
    "f1": 0.47251114413075784,
    "true_positives": 2703,
    "true_negatives": 262,
    "false_positives": 5920,
    "false_negatives": 115,
    "total": 9000,
    "normalized_precision": 0.5412652630169127,
    "normalized_recall": 0.9560168605483136,
    "selection_accuracy": 0.35555555555555557,
    "solvable_selection_accuracy": 0.6153846153846154
  },
  "QwenPRM_max_scores": {
    "accuracy": 0.31344444444444447,
    "precision": 0.3132155162832055,
    "recall": 1.0,
    "f1": 0.47702073635209474,
    "true_positives": 2818,
    "true_negatives": 3,
    "false_positives": 6179,
    "false_negatives": 0,
    "total": 9000,
    "normalized_precision": 0.5419560994560995,
    "normalized_recall": 1.0,
    "selection_accuracy": 0.3111111111111111,
    "solvable_selection_accuracy": 0.5384615384615384
  },
  "QwenPRM_min_scores": {
    "accuracy": 0.7376666666666667,
    "precision": 0.6714178544636159,
    "recall": 0.3176011355571327,
    "f1": 0.4312213924355577,
    "true_positives": 895,
    "true_negatives": 5744,
    "false_positives": 438,
    "false_negatives": 1923,
    "total": 9000,
    "normalized_precision": 0.4376644927668981,
    "normalized_recall": 0.26557797851666154,
    "selection_accuracy": 0.3111111111111111,
    "solvable_selection_accuracy": 0.5384615384615384
  },
  "SkyworksGemma_scores": {
    "accuracy": 0.4398888888888889,
    "precision": 0.32432432432432434,
    "recall": 0.7281760113555713,
    "f1": 0.44876981957353745,
    "true_positives": 2052,
    "true_negatives": 1907,
    "false_positives": 4275,
    "false_negatives": 766,
    "total": 9000,
    "normalized_precision": 0.5628056292388549,
    "normalized_recall": 0.6878051054954256,
    "selection_accuracy": 0.32222222222222224,
    "solvable_selection_accuracy": 0.5576923076923077
  },
  "Skyworks_scores": {
    "accuracy": 0.5927777777777777,
    "precision": 0.3720157147174373,
    "recall": 0.4368346344925479,
    "f1": 0.40182797453892605,
    "true_positives": 1231,
    "true_negatives": 4104,
    "false_positives": 2078,
    "false_negatives": 1587,
    "total": 9000,
    "normalized_precision": 0.5424061503832882,
    "normalized_recall": 0.3987314717932912,
    "selection_accuracy": 0.34444444444444444,
    "solvable_selection_accuracy": 0.5961538461538461
  },
  "URM_scores": {
    "accuracy": 0.38755555555555554,
    "precision": 0.31821862348178137,
    "recall": 0.836763662171753,
    "f1": 0.46108721157606575,
    "true_positives": 2358,
    "true_negatives": 1130,
    "false_positives": 5052,
    "false_negatives": 460,
    "total": 9000,
    "normalized_precision": 0.5515278223685731,
    "normalized_recall": 0.8011620404706546,
    "selection_accuracy": 0.2777777777777778,
    "solvable_selection_accuracy": 0.4807692307692308
  }
}