{
  "ArmorRM_scores": {
    "accuracy": 0.6863333333333334,
    "precision": 0.10987654320987654,
    "recall": 0.28802588996763756,
    "f1": 0.1590705987488829,
    "true_positives": 267,
    "true_negatives": 5910,
    "false_positives": 2163,
    "false_negatives": 660,
    "total": 9000,
    "normalized_precision": 0.221606277073309,
    "normalized_recall": 0.3343356005076027,
    "selection_accuracy": 0.15555555555555556,
    "solvable_selection_accuracy": 0.2978723404255319
  },
  "EurusPRMStage1_avg_scores": {
    "accuracy": 0.359,
    "precision": 0.0960960960960961,
    "recall": 0.6213592233009708,
    "f1": 0.16644993498049415,
    "true_positives": 576,
    "true_negatives": 2655,
    "false_positives": 5418,
    "false_negatives": 351,
    "total": 9000,
    "normalized_precision": 0.22987860549646585,
    "normalized_recall": 0.6905557890297224,
    "selection_accuracy": 0.1,
    "solvable_selection_accuracy": 0.19148936170212766
  },
  "EurusPRMStage1_max_scores": {
    "accuracy": 0.8443333333333334,
    "precision": 0.14940828402366865,
    "recall": 0.10895361380798274,
    "f1": 0.12601372426699936,
    "true_positives": 101,
    "true_negatives": 7498,
    "false_positives": 575,
    "false_negatives": 826,
    "total": 9000,
    "normalized_precision": 0.24444088945768674,
    "normalized_recall": 0.09354003504155117,
    "selection_accuracy": 0.15555555555555556,
    "solvable_selection_accuracy": 0.2978723404255319
  },
  "EurusPRMStage1_min_scores": {
    "accuracy": 0.16555555555555557,
    "precision": 0.10566670660117407,
    "recall": 0.9514563106796117,
    "f1": 0.19020918697433686,
    "true_positives": 882,
    "true_negatives": 608,
    "false_positives": 7465,
    "false_negatives": 45,
    "total": 9000,
    "normalized_precision": 0.20023804443765883,
    "normalized_recall": 0.9351877394945697,
    "selection_accuracy": 0.16666666666666666,
    "solvable_selection_accuracy": 0.3191489361702128
  },
  "EurusPRMStage2_avg_scores": {
    "accuracy": 0.22955555555555557,
    "precision": 0.1089701861736753,
    "recall": 0.9029126213592233,
    "f1": 0.1944702602230483,
    "true_positives": 837,
    "true_negatives": 1229,
    "false_positives": 6844,
    "false_negatives": 90,
    "total": 9000,
    "normalized_precision": 0.20319863759695495,
    "normalized_recall": 0.8159998925260739,
    "selection_accuracy": 0.12222222222222222,
    "solvable_selection_accuracy": 0.23404255319148937
  },
  "EurusPRMStage2_max_scores": {
    "accuracy": 0.6954444444444444,
    "precision": 0.0680952380952381,
    "recall": 0.15426105717367852,
    "f1": 0.0944829864552362,
    "true_positives": 143,
    "true_negatives": 6116,
    "false_positives": 1957,
    "false_negatives": 784,
    "total": 9000,
    "normalized_precision": 0.24268978115148962,
    "normalized_recall": 0.18684294420577427,
    "selection_accuracy": 0.13333333333333333,
    "solvable_selection_accuracy": 0.2553191489361702
  },
  "EurusPRMStage2_min_scores": {
    "accuracy": 0.2331111111111111,
    "precision": 0.10695961057755558,
    "recall": 0.8770226537216829,
    "f1": 0.19066604127579737,
    "true_positives": 813,
    "true_negatives": 1285,
    "false_positives": 6788,
    "false_negatives": 114,
    "total": 9000,
    "normalized_precision": 0.20568379908977022,
    "normalized_recall": 0.882349758286798,
    "selection_accuracy": 0.16666666666666666,
    "solvable_selection_accuracy": 0.3191489361702128
  },
  "GPM_scores": {
    "accuracy": 0.7862222222222223,
    "precision": 0.07062876830318691,
    "recall": 0.08845738942826321,
    "f1": 0.078544061302682,
    "true_positives": 82,
    "true_negatives": 6994,
    "false_positives": 1079,
    "false_negatives": 845,
    "total": 9000,
    "normalized_precision": 0.15231099803057727,
    "normalized_recall": 0.1395534455356034,
    "selection_accuracy": 0.1,
    "solvable_selection_accuracy": 0.19148936170212766
  },
  "GRMGemma_scores": {
    "accuracy": 0.7892222222222223,
    "precision": 0.11012861736334405,
    "recall": 0.14778856526429343,
    "f1": 0.12620912022109626,
    "true_positives": 137,
    "true_negatives": 6966,
    "false_positives": 1107,
    "false_negatives": 790,
    "total": 9000,
    "normalized_precision": 0.20975994180526158,
    "normalized_recall": 0.20249568000349852,
    "selection_accuracy": 0.12222222222222222,
    "solvable_selection_accuracy": 0.23404255319148937
  },
  "GRMLlama32_scores": {
    "accuracy": 0.798,
    "precision": 0.10886742756804214,
    "recall": 0.133764832793959,
    "f1": 0.12003872216844144,
    "true_positives": 124,
    "true_negatives": 7058,
    "false_positives": 1015,
    "false_negatives": 803,
    "total": 9000,
    "normalized_precision": 0.20661280736077467,
    "normalized_recall": 0.19230042570411027,
    "selection_accuracy": 0.16666666666666666,
    "solvable_selection_accuracy": 0.3191489361702128
  },
  "GRM_scores": {
    "accuracy": 0.8224444444444444,
    "precision": 0.12345679012345678,
    "recall": 0.1186623516720604,
    "f1": 0.121012101210121,
    "true_positives": 110,
    "true_negatives": 7292,
    "false_positives": 781,
    "false_negatives": 817,
    "total": 9000,
    "normalized_precision": 0.2332259712891753,
    "normalized_recall": 0.1484002145174897,
    "selection_accuracy": 0.1111111111111111,
    "solvable_selection_accuracy": 0.2127659574468085
  },
  "InternLM2Reward7B_scores": {
    "accuracy": 0.7592222222222222,
    "precision": 0.10559796437659033,
    "recall": 0.1790722761596548,
    "f1": 0.13285314125650258,
    "true_positives": 166,
    "true_negatives": 6667,
    "false_positives": 1406,
    "false_negatives": 761,
    "total": 9000,
    "normalized_precision": 0.23434802840148625,
    "normalized_recall": 0.183713067111239,
    "selection_accuracy": 0.14444444444444443,
    "solvable_selection_accuracy": 0.2765957446808511
  },
  "OffsetBias_scores": {
    "accuracy": 0.7291111111111112,
    "precision": 0.11076764554353426,
    "recall": 0.2319309600862999,
    "f1": 0.14993026499302647,
    "true_positives": 215,
    "true_negatives": 6347,
    "false_positives": 1726,
    "false_negatives": 712,
    "total": 9000,
    "normalized_precision": 0.2141147042633452,
    "normalized_recall": 0.26325562440081285,
    "selection_accuracy": 0.07777777777777778,
    "solvable_selection_accuracy": 0.14893617021276595
  },
  "QRM_scores": {
    "accuracy": 0.768,
    "precision": 0.11066398390342053,
    "recall": 0.1779935275080906,
    "f1": 0.13647642679900746,
    "true_positives": 165,
    "true_negatives": 6747,
    "false_positives": 1326,
    "false_negatives": 762,
    "total": 9000,
    "normalized_precision": 0.2049373732879431,
    "normalized_recall": 0.2201094331018137,
    "selection_accuracy": 0.14444444444444443,
    "solvable_selection_accuracy": 0.2765957446808511
  },
  "QwenPRM_avg_scores": {
    "accuracy": 0.2782222222222222,
    "precision": 0.11213260899846775,
    "recall": 0.8683926645091694,
    "f1": 0.19861830742659758,
    "true_positives": 805,
    "true_negatives": 1699,
    "false_positives": 6374,
    "false_negatives": 122,
    "total": 9000,
    "normalized_precision": 0.20616054418550944,
    "normalized_recall": 0.8834482083804539,
    "selection_accuracy": 0.18888888888888888,
    "solvable_selection_accuracy": 0.3617021276595745
  },
  "QwenPRM_max_scores": {
    "accuracy": 0.13733333333333334,
    "precision": 0.10438606642749682,
    "recall": 0.9730312837108953,
    "f1": 0.18854515050167225,
    "true_positives": 902,
    "true_negatives": 334,
    "false_positives": 7739,
    "false_negatives": 25,
    "total": 9000,
    "normalized_precision": 0.19874793553707615,
    "normalized_recall": 0.9600293328349466,
    "selection_accuracy": 0.1111111111111111,
    "solvable_selection_accuracy": 0.2127659574468085
  },
  "QwenPRM_min_scores": {
    "accuracy": 0.8277777777777777,
    "precision": 0.19727891156462585,
    "recall": 0.21898597626752966,
    "f1": 0.20756646216768915,
    "true_positives": 203,
    "true_negatives": 7247,
    "false_positives": 826,
    "false_negatives": 724,
    "total": 9000,
    "normalized_precision": 0.2715840446228099,
    "normalized_recall": 0.19682625170579923,
    "selection_accuracy": 0.15555555555555556,
    "solvable_selection_accuracy": 0.2978723404255319
  },
  "Skyworks_scores": {
    "accuracy": 0.8048888888888889,
    "precision": 0.11797235023041475,
    "recall": 0.13807982740021574,
    "f1": 0.1272365805168986,
    "true_positives": 128,
    "true_negatives": 7116,
    "false_positives": 957,
    "false_negatives": 799,
    "total": 9000,
    "normalized_precision": 0.22343627767397609,
    "normalized_recall": 0.20079603768484958,
    "selection_accuracy": 0.12222222222222222,
    "solvable_selection_accuracy": 0.23404255319148937
  },
  "URM_scores": {
    "accuracy": 0.7903333333333333,
    "precision": 0.12264150943396226,
    "recall": 0.16828478964401294,
    "f1": 0.14188267394270124,
    "true_positives": 156,
    "true_negatives": 6957,
    "false_positives": 1116,
    "false_negatives": 771,
    "total": 9000,
    "normalized_precision": 0.22155104217439717,
    "normalized_recall": 0.21046586724211408,
    "selection_accuracy": 0.1111111111111111,
    "solvable_selection_accuracy": 0.2127659574468085
  }
}