{
  "ArmorRM_scores": {
    "accuracy": 0.48606,
    "precision": 0.4743080295971499,
    "recall": 0.2775083176333828,
    "f1": 0.3501504691095769,
    "true_positives": 6923,
    "true_negatives": 17380,
    "false_positives": 7673,
    "false_negatives": 18024,
    "total": 50000,
    "normalized_precision": 0.5093512129510307,
    "normalized_recall": 0.2918611985693735,
    "selection_accuracy": 0.526,
    "solvable_selection_accuracy": 0.530241935483871
  },
  "EurusPRMStage1_avg_scores": {
    "accuracy": 0.45874,
    "precision": 0.46639776408562533,
    "recall": 0.5886479336192728,
    "f1": 0.5204401679868162,
    "true_positives": 14685,
    "true_negatives": 8252,
    "false_positives": 16801,
    "false_negatives": 10262,
    "total": 50000,
    "normalized_precision": 0.5661618418399903,
    "normalized_recall": 0.6807479102005486,
    "selection_accuracy": 0.682,
    "solvable_selection_accuracy": 0.6875
  },
  "EurusPRMStage1_max_scores": {
    "accuracy": 0.5507,
    "precision": 0.7655113393239196,
    "recall": 0.14342405900509078,
    "f1": 0.24158536173660577,
    "true_positives": 3578,
    "true_negatives": 23957,
    "false_positives": 1096,
    "false_negatives": 21369,
    "total": 50000,
    "normalized_precision": 0.6612402579104708,
    "normalized_recall": 0.1269020668648917,
    "selection_accuracy": 0.666,
    "solvable_selection_accuracy": 0.6713709677419355
  },
  "EurusPRMStage1_min_scores": {
    "accuracy": 0.5276,
    "precision": 0.5140539281099744,
    "recall": 0.9728223834529202,
    "f1": 0.6726627678150724,
    "true_positives": 24269,
    "true_negatives": 2111,
    "false_positives": 22942,
    "false_negatives": 678,
    "total": 50000,
    "normalized_precision": 0.5146883293645469,
    "normalized_recall": 0.9727970640650024,
    "selection_accuracy": 0.694,
    "solvable_selection_accuracy": 0.6995967741935484
  },
  "EurusPRMStage2_avg_scores": {
    "accuracy": 0.52296,
    "precision": 0.5120613310422312,
    "recall": 0.931735278791037,
    "f1": 0.6609041796986068,
    "true_positives": 23244,
    "true_negatives": 2904,
    "false_positives": 22149,
    "false_negatives": 1703,
    "total": 50000,
    "normalized_precision": 0.5207508226697015,
    "normalized_recall": 0.9412139949793522,
    "selection_accuracy": 0.7,
    "solvable_selection_accuracy": 0.7056451612903226
  },
  "EurusPRMStage2_max_scores": {
    "accuracy": 0.47776,
    "precision": 0.3911825144778629,
    "recall": 0.08393794845071552,
    "f1": 0.13821782178217823,
    "true_positives": 2094,
    "true_negatives": 21794,
    "false_positives": 3259,
    "false_negatives": 22853,
    "total": 50000,
    "normalized_precision": 0.66353146426812,
    "normalized_recall": 0.12463191230427322,
    "selection_accuracy": 0.686,
    "solvable_selection_accuracy": 0.6915322580645161
  },
  "EurusPRMStage2_min_scores": {
    "accuracy": 0.53452,
    "precision": 0.5187711778829971,
    "recall": 0.9266845712911372,
    "f1": 0.6651704790677601,
    "true_positives": 23118,
    "true_negatives": 3608,
    "false_positives": 21445,
    "false_negatives": 1829,
    "total": 50000,
    "normalized_precision": 0.5198821126141518,
    "normalized_recall": 0.9291012681316541,
    "selection_accuracy": 0.708,
    "solvable_selection_accuracy": 0.7137096774193549
  },
  "GPM_scores": {
    "accuracy": 0.5153,
    "precision": 0.5096571180555556,
    "recall": 0.7531166072072795,
    "f1": 0.6079176845545292,
    "true_positives": 18788,
    "true_negatives": 6977,
    "false_positives": 18076,
    "false_negatives": 6159,
    "total": 50000,
    "normalized_precision": 0.503693040876277,
    "normalized_recall": 0.7383755363132519,
    "selection_accuracy": 0.504,
    "solvable_selection_accuracy": 0.5080645161290323
  },
  "GRMGemma_scores": {
    "accuracy": 0.49868,
    "precision": 0.4938993130318876,
    "recall": 0.19308934942077205,
    "f1": 0.27763688760806915,
    "true_positives": 4817,
    "true_negatives": 20117,
    "false_positives": 4936,
    "false_negatives": 20130,
    "total": 50000,
    "normalized_precision": 0.5119898281906512,
    "normalized_recall": 0.19480810332043202,
    "selection_accuracy": 0.528,
    "solvable_selection_accuracy": 0.532258064516129
  },
  "GRMLlama32_scores": {
    "accuracy": 0.50206,
    "precision": 0.5028512773722628,
    "recall": 0.17673467751633462,
    "f1": 0.26154530624351174,
    "true_positives": 4409,
    "true_negatives": 20694,
    "false_positives": 4359,
    "false_negatives": 20538,
    "total": 50000,
    "normalized_precision": 0.5187243905216444,
    "normalized_recall": 0.17762974987989666,
    "selection_accuracy": 0.538,
    "solvable_selection_accuracy": 0.5423387096774194
  },
  "GRM_scores": {
    "accuracy": 0.51504,
    "precision": 0.5396033994334277,
    "recall": 0.19088467551208563,
    "f1": 0.2820087646571124,
    "true_positives": 4762,
    "true_negatives": 20990,
    "false_positives": 4063,
    "false_negatives": 20185,
    "total": 50000,
    "normalized_precision": 0.53588526420932,
    "normalized_recall": 0.1844601014269252,
    "selection_accuracy": 0.528,
    "solvable_selection_accuracy": 0.532258064516129
  },
  "InternLM2Reward7B_scores": {
    "accuracy": 0.5284,
    "precision": 0.5837725211422968,
    "recall": 0.19092476049224355,
    "f1": 0.28774240318975414,
    "true_positives": 4763,
    "true_negatives": 21657,
    "false_positives": 3396,
    "false_negatives": 20184,
    "total": 50000,
    "normalized_precision": 0.5709336902642328,
    "normalized_recall": 0.18703303097670262,
    "selection_accuracy": 0.594,
    "solvable_selection_accuracy": 0.5987903225806451
  },
  "OffsetBias_scores": {
    "accuracy": 0.47052,
    "precision": 0.453081791925275,
    "recall": 0.29554655870445345,
    "f1": 0.3577389616690927,
    "true_positives": 7373,
    "true_negatives": 16153,
    "false_positives": 8900,
    "false_negatives": 17574,
    "total": 50000,
    "normalized_precision": 0.49564058976276526,
    "normalized_recall": 0.3230286606125869,
    "selection_accuracy": 0.542,
    "solvable_selection_accuracy": 0.5463709677419355
  },
  "QRM_scores": {
    "accuracy": 0.50168,
    "precision": 0.5013105605817197,
    "recall": 0.23766384735639556,
    "f1": 0.32245608310219176,
    "true_positives": 5929,
    "true_negatives": 19155,
    "false_positives": 5898,
    "false_negatives": 19018,
    "total": 50000,
    "normalized_precision": 0.5231275523502433,
    "normalized_recall": 0.24264865242519265,
    "selection_accuracy": 0.53,
    "solvable_selection_accuracy": 0.5342741935483871
  },
  "QwenPRM_avg_scores": {
    "accuracy": 0.58878,
    "precision": 0.552186949693018,
    "recall": 0.9301318795847197,
    "f1": 0.6929773477280532,
    "true_positives": 23204,
    "true_negatives": 6235,
    "false_positives": 18818,
    "false_negatives": 1743,
    "total": 50000,
    "normalized_precision": 0.531318332765165,
    "normalized_recall": 0.8954345743871681,
    "selection_accuracy": 0.732,
    "solvable_selection_accuracy": 0.7379032258064516
  },
  "QwenPRM_max_scores": {
    "accuracy": 0.51296,
    "precision": 0.5061562338334196,
    "recall": 0.9804786146630857,
    "f1": 0.6676493066928704,
    "true_positives": 24460,
    "true_negatives": 1188,
    "false_positives": 23865,
    "false_negatives": 487,
    "total": 50000,
    "normalized_precision": 0.5076791211049643,
    "normalized_recall": 0.9770268876873748,
    "selection_accuracy": 0.526,
    "solvable_selection_accuracy": 0.530241935483871
  },
  "QwenPRM_min_scores": {
    "accuracy": 0.77484,
    "precision": 0.8625840970493193,
    "recall": 0.6527037319116527,
    "f1": 0.7431087988316903,
    "true_positives": 16283,
    "true_negatives": 22459,
    "false_positives": 2594,
    "false_negatives": 8664,
    "total": 50000,
    "normalized_precision": 0.680200374749716,
    "normalized_recall": 0.5107573281850202,
    "selection_accuracy": 0.732,
    "solvable_selection_accuracy": 0.7379032258064516
  },
  "Skyworks_scores": {
    "accuracy": 0.5083,
    "precision": 0.5220409157330735,
    "recall": 0.1718443099370666,
    "f1": 0.25857233330317564,
    "true_positives": 4287,
    "true_negatives": 21128,
    "false_positives": 3925,
    "false_negatives": 20660,
    "total": 50000,
    "normalized_precision": 0.5272376439773266,
    "normalized_recall": 0.1673062401965779,
    "selection_accuracy": 0.534,
    "solvable_selection_accuracy": 0.5383064516129032
  },
  "URM_scores": {
    "accuracy": 0.49008,
    "precision": 0.47429534600618034,
    "recall": 0.20303042449993988,
    "f1": 0.28434289563801723,
    "true_positives": 5065,
    "true_negatives": 19439,
    "false_positives": 5614,
    "false_negatives": 19882,
    "total": 50000,
    "normalized_precision": 0.5180849519195323,
    "normalized_recall": 0.21410877263252603,
    "selection_accuracy": 0.548,
    "solvable_selection_accuracy": 0.5524193548387096
  }
}