{
  "ArmorRM_scores": {
    "accuracy": 0.4038888888888889,
    "precision": 0.32368821819188703,
    "recall": 0.829666430092264,
    "f1": 0.46569066826013344,
    "true_positives": 2338,
    "true_negatives": 1297,
    "false_positives": 4885,
    "false_negatives": 480,
    "total": 9000,
    "normalized_precision": 0.5345397530718221,
    "normalized_recall": 0.8207603229970571,
    "selection_accuracy": 0.2777777777777778,
    "solvable_selection_accuracy": 0.4807692307692308
  },
  "EurusPRMStage1_avg_scores": {
    "accuracy": 0.56,
    "precision": 0.28501506024096385,
    "recall": 0.2686302342086586,
    "f1": 0.27658019729630984,
    "true_positives": 757,
    "true_negatives": 4283,
    "false_positives": 1899,
    "false_negatives": 2061,
    "total": 9000,
    "normalized_precision": 0.5420849515990185,
    "normalized_recall": 0.24992262893110742,
    "selection_accuracy": 0.32222222222222224,
    "solvable_selection_accuracy": 0.5576923076923077
  },
  "EurusPRMStage1_max_scores": {
    "accuracy": 0.6556666666666666,
    "precision": 0.31090174966352624,
    "recall": 0.08197303051809794,
    "f1": 0.12973883740522324,
    "true_positives": 231,
    "true_negatives": 5670,
    "false_positives": 512,
    "false_negatives": 2587,
    "total": 9000,
    "normalized_precision": 0.5142828886246599,
    "normalized_recall": 0.07797704333738609,
    "selection_accuracy": 0.34444444444444444,
    "solvable_selection_accuracy": 0.5961538461538461
  },
  "EurusPRMStage1_min_scores": {
    "accuracy": 0.3263333333333333,
    "precision": 0.30682224074294556,
    "recall": 0.9144783534421576,
    "f1": 0.4594811446910938,
    "true_positives": 2577,
    "true_negatives": 360,
    "false_positives": 5822,
    "false_negatives": 241,
    "total": 9000,
    "normalized_precision": 0.545789787177031,
    "normalized_recall": 0.9330475195092625,
    "selection_accuracy": 0.35555555555555557,
    "solvable_selection_accuracy": 0.6153846153846154
  },
  "EurusPRMStage2_avg_scores": {
    "accuracy": 0.3507777777777778,
    "precision": 0.31185470829705186,
    "recall": 0.8896380411639461,
    "f1": 0.4618218660771852,
    "true_positives": 2507,
    "true_negatives": 650,
    "false_positives": 5532,
    "false_negatives": 311,
    "total": 9000,
    "normalized_precision": 0.5517750835548454,
    "normalized_recall": 0.8741749645796587,
    "selection_accuracy": 0.37777777777777777,
    "solvable_selection_accuracy": 0.6538461538461539
  },
  "EurusPRMStage2_max_scores": {
    "accuracy": 0.544,
    "precision": 0.2644688644688645,
    "recall": 0.25621007806955287,
    "f1": 0.26027397260273977,
    "true_positives": 722,
    "true_negatives": 4174,
    "false_positives": 2008,
    "false_negatives": 2096,
    "total": 9000,
    "normalized_precision": 0.5616931410287067,
    "normalized_recall": 0.3129683742360102,
    "selection_accuracy": 0.36666666666666664,
    "solvable_selection_accuracy": 0.6346153846153846
  },
  "EurusPRMStage2_min_scores": {
    "accuracy": 0.3665555555555556,
    "precision": 0.3191115572844774,
    "recall": 0.9024130589070263,
    "f1": 0.4714934643552425,
    "true_positives": 2543,
    "true_negatives": 756,
    "false_positives": 5426,
    "false_negatives": 275,
    "total": 9000,
    "normalized_precision": 0.5517440829364164,
    "normalized_recall": 0.8933673916394097,
    "selection_accuracy": 0.36666666666666664,
    "solvable_selection_accuracy": 0.6346153846153846
  },
  "GPM_scores": {
    "accuracy": 0.6273333333333333,
    "precision": 0.14736842105263157,
    "recall": 0.0397444996451384,
    "f1": 0.0626048071548351,
    "true_positives": 112,
    "true_negatives": 5534,
    "false_positives": 648,
    "false_negatives": 2706,
    "total": 9000,
    "normalized_precision": 0.5830085346752014,
    "normalized_recall": 0.06385352865343125,
    "selection_accuracy": 0.3333333333333333,
    "solvable_selection_accuracy": 0.5769230769230769
  },
  "GRMGemma_scores": {
    "accuracy": 0.434,
    "precision": 0.3306547619047619,
    "recall": 0.7885024840312278,
    "f1": 0.4659257706018033,
    "true_positives": 2222,
    "true_negatives": 1684,
    "false_positives": 4498,
    "false_negatives": 596,
    "total": 9000,
    "normalized_precision": 0.5649751561552518,
    "normalized_recall": 0.7436768892201999,
    "selection_accuracy": 0.35555555555555557,
    "solvable_selection_accuracy": 0.6153846153846154
  },
  "GRMLlama32_scores": {
    "accuracy": 0.4318888888888889,
    "precision": 0.33057729218957627,
    "recall": 0.7945351312987935,
    "f1": 0.46689604837868837,
    "true_positives": 2239,
    "true_negatives": 1648,
    "false_positives": 4534,
    "false_negatives": 579,
    "total": 9000,
    "normalized_precision": 0.5636593183223518,
    "normalized_recall": 0.7539384662539562,
    "selection_accuracy": 0.3111111111111111,
    "solvable_selection_accuracy": 0.5384615384615384
  },
  "GRM_scores": {
    "accuracy": 0.4681111111111111,
    "precision": 0.32809498865025316,
    "recall": 0.6667849538679915,
    "f1": 0.4397893504973669,
    "true_positives": 1879,
    "true_negatives": 2334,
    "false_positives": 3848,
    "false_negatives": 939,
    "total": 9000,
    "normalized_precision": 0.563134461531642,
    "normalized_recall": 0.6303091405257996,
    "selection_accuracy": 0.2777777777777778,
    "solvable_selection_accuracy": 0.4807692307692308
  },
  "INFORM_scores": {
    "accuracy": 0.4712222222222222,
    "precision": 0.33886767391665285,
    "recall": 0.7242725337118524,
    "f1": 0.46171247596425746,
    "true_positives": 2041,
    "true_negatives": 2200,
    "false_positives": 3982,
    "false_negatives": 777,
    "total": 9000,
    "normalized_precision": 0.5594954159414258,
    "normalized_recall": 0.6729378368478556,
    "selection_accuracy": 0.34444444444444444,
    "solvable_selection_accuracy": 0.5961538461538461
  },
  "InternLM2Reward7B_scores": {
    "accuracy": 0.5954444444444444,
    "precision": 0.34865759470393526,
    "recall": 0.33640880056777855,
    "f1": 0.3424236951417735,
    "true_positives": 948,
    "true_negatives": 4411,
    "false_positives": 1771,
    "false_negatives": 1870,
    "total": 9000,
    "normalized_precision": 0.5612719947023813,
    "normalized_recall": 0.30975740499489823,
    "selection_accuracy": 0.3111111111111111,
    "solvable_selection_accuracy": 0.5384615384615384
  },
  "InternLM2RewardModel_scores": {
    "accuracy": 0.497,
    "precision": 0.33028798411122146,
    "recall": 0.5901348474095103,
    "f1": 0.42353240799694386,
    "true_positives": 1663,
    "true_negatives": 2810,
    "false_positives": 3372,
    "false_negatives": 1155,
    "total": 9000,
    "normalized_precision": 0.5566802432070024,
    "normalized_recall": 0.5540082868238954,
    "selection_accuracy": 0.26666666666666666,
    "solvable_selection_accuracy": 0.46153846153846156
  },
  "LDLRewardGemma_scores": {
    "accuracy": 0.572,
    "precision": 0.26262626262626265,
    "recall": 0.20298083747338538,
    "f1": 0.2289831865492394,
    "true_positives": 572,
    "true_negatives": 4576,
    "false_positives": 1606,
    "false_negatives": 2246,
    "total": 9000,
    "normalized_precision": 0.48029529896428785,
    "normalized_recall": 0.25731167498514645,
    "selection_accuracy": 0.26666666666666666,
    "solvable_selection_accuracy": 0.46153846153846156
  },
  "OffsetBias_scores": {
    "accuracy": 0.43577777777777776,
    "precision": 0.3297680024103646,
    "recall": 0.776792051100071,
    "f1": 0.46298646362098134,
    "true_positives": 2189,
    "true_negatives": 1733,
    "false_positives": 4449,
    "false_negatives": 629,
    "total": 9000,
    "normalized_precision": 0.5643456975240063,
    "normalized_recall": 0.7382773039295075,
    "selection_accuracy": 0.3,
    "solvable_selection_accuracy": 0.5192307692307693
  },
  "QRMGemma_scores": {
    "accuracy": 0.4368888888888889,
    "precision": 0.3327386262265834,
    "recall": 0.794180269694819,
    "f1": 0.4689857502095557,
    "true_positives": 2238,
    "true_negatives": 1694,
    "false_positives": 4488,
    "false_negatives": 580,
    "total": 9000,
    "normalized_precision": 0.5654387468977473,
    "normalized_recall": 0.7426123842010408,
    "selection_accuracy": 0.3111111111111111,
    "solvable_selection_accuracy": 0.5384615384615384
  },
  "QRM_scores": {
    "accuracy": 0.42933333333333334,
    "precision": 0.33010847258868364,
    "recall": 0.7991483321504613,
    "f1": 0.4672199170124481,
    "true_positives": 2252,
    "true_negatives": 1612,
    "false_positives": 4570,
    "false_negatives": 566,
    "total": 9000,
    "normalized_precision": 0.5638890003526262,
    "normalized_recall": 0.752852568368409,
    "selection_accuracy": 0.3333333333333333,
    "solvable_selection_accuracy": 0.5769230769230769
  },
  "Qwen72B_scores": {
    "accuracy": 0.6171111111111112,
    "precision": 0.43450146015853147,
    "recall": 0.7391767210787793,
    "f1": 0.5472937467157121,
    "true_positives": 2083,
    "true_negatives": 3471,
    "false_positives": 2711,
    "false_negatives": 735,
    "total": 9000,
    "normalized_precision": 0.6432826686690232,
    "normalized_recall": 0.7263856301681163,
    "selection_accuracy": 0.4222222222222222,
    "solvable_selection_accuracy": 0.7307692307692307
  },
  "QwenPRM_avg_scores": {
    "accuracy": 0.42055555555555557,
    "precision": 0.3147890588780714,
    "recall": 0.7228530872959545,
    "f1": 0.4385832705350414,
    "true_positives": 2037,
    "true_negatives": 1748,
    "false_positives": 4434,
    "false_negatives": 781,
    "total": 9000,
    "normalized_precision": 0.5450510923585383,
    "normalized_recall": 0.7345417397072065,
    "selection_accuracy": 0.35555555555555557,
    "solvable_selection_accuracy": 0.6153846153846154
  },
  "QwenPRM_max_scores": {
    "accuracy": 0.36822222222222223,
    "precision": 0.3174643584521385,
    "recall": 0.8850248403122782,
    "f1": 0.467303728686528,
    "true_positives": 2494,
    "true_negatives": 820,
    "false_positives": 5362,
    "false_negatives": 324,
    "total": 9000,
    "normalized_precision": 0.5422382686130492,
    "normalized_recall": 0.8940436405338658,
    "selection_accuracy": 0.3111111111111111,
    "solvable_selection_accuracy": 0.5384615384615384
  },
  "QwenPRM_min_scores": {
    "accuracy": 0.6934444444444444,
    "precision": 0.514713216957606,
    "recall": 0.36621717530163234,
    "f1": 0.42794940908148454,
    "true_positives": 1032,
    "true_negatives": 5209,
    "false_positives": 973,
    "false_negatives": 1786,
    "total": 9000,
    "normalized_precision": 0.5103618286137935,
    "normalized_recall": 0.3130009421056391,
    "selection_accuracy": 0.3111111111111111,
    "solvable_selection_accuracy": 0.5384615384615384
  },
  "SkyworksGemma_scores": {
    "accuracy": 0.44755555555555554,
    "precision": 0.3340523882896764,
    "recall": 0.7693399574166075,
    "f1": 0.46583584013751606,
    "true_positives": 2168,
    "true_negatives": 1860,
    "false_positives": 4322,
    "false_negatives": 650,
    "total": 9000,
    "normalized_precision": 0.5652372667840345,
    "normalized_recall": 0.7148367800964528,
    "selection_accuracy": 0.32222222222222224,
    "solvable_selection_accuracy": 0.5576923076923077
  },
  "Skyworks_scores": {
    "accuracy": 0.5254444444444445,
    "precision": 0.3414101724514298,
    "recall": 0.5550035486160397,
    "f1": 0.42275983240978504,
    "true_positives": 1564,
    "true_negatives": 3165,
    "false_positives": 3017,
    "false_negatives": 1254,
    "total": 9000,
    "normalized_precision": 0.5693567035677997,
    "normalized_recall": 0.5243340296540822,
    "selection_accuracy": 0.34444444444444444,
    "solvable_selection_accuracy": 0.5961538461538461
  },
  "URM_scores": {
    "accuracy": 0.4552222222222222,
    "precision": 0.34283129805517865,
    "recall": 0.8069552874378992,
    "f1": 0.4812189186329489,
    "true_positives": 2274,
    "true_negatives": 1823,
    "false_positives": 4359,
    "false_negatives": 544,
    "total": 9000,
    "normalized_precision": 0.5311379809334885,
    "normalized_recall": 0.7427761795939856,
    "selection_accuracy": 0.2777777777777778,
    "solvable_selection_accuracy": 0.4807692307692308
  }
}