{
  "ArmorRM_scores": {
    "accuracy": 0.75336,
    "precision": 0.788308850572226,
    "recall": 0.9346671796844941,
    "f1": 0.8552718054642757,
    "true_positives": 36438,
    "true_negatives": 1230,
    "false_positives": 9785,
    "false_negatives": 2547,
    "total": 50000,
    "normalized_precision": 0.7928522541338116,
    "normalized_recall": 0.9228224857020941,
    "selection_accuracy": 0.758,
    "solvable_selection_accuracy": 0.768762677484787
  },
  "DecisionTreeReward27B_scores": {
    "accuracy": 0.66882,
    "precision": 0.7884595595801606,
    "recall": 0.7861741695523919,
    "f1": 0.7873152060829469,
    "true_positives": 30649,
    "true_negatives": 2792,
    "false_positives": 8223,
    "false_negatives": 8336,
    "total": 50000,
    "normalized_precision": 0.7946415929627411,
    "normalized_recall": 0.771859366503759,
    "selection_accuracy": 0.8,
    "solvable_selection_accuracy": 0.8113590263691683
  },
  "DecisionTreeReward8B_scores": {
    "accuracy": 0.6734,
    "precision": 0.7933445552246536,
    "recall": 0.7858150570732333,
    "f1": 0.7895618556701032,
    "true_positives": 30635,
    "true_negatives": 3035,
    "false_positives": 7980,
    "false_negatives": 8350,
    "total": 50000,
    "normalized_precision": 0.7949462948271215,
    "normalized_recall": 0.7620625258352581,
    "selection_accuracy": 0.8,
    "solvable_selection_accuracy": 0.8113590263691683
  },
  "EurusPRMStage1_avg_scores": {
    "accuracy": 0.77988,
    "precision": 0.7804799807526516,
    "recall": 0.9985378991919969,
    "f1": 0.8761450338727466,
    "true_positives": 38928,
    "true_negatives": 66,
    "false_positives": 10949,
    "false_negatives": 57,
    "total": 50000,
    "normalized_precision": 0.7911718155318636,
    "normalized_recall": 0.9982937278976919,
    "selection_accuracy": 0.79,
    "solvable_selection_accuracy": 0.8012170385395537
  },
  "EurusPRMStage1_max_scores": {
    "accuracy": 0.2274,
    "precision": 0.9256594724220624,
    "recall": 0.00990124406823137,
    "f1": 0.019592914065275874,
    "true_positives": 386,
    "true_negatives": 10984,
    "false_positives": 31,
    "false_negatives": 38599,
    "total": 50000,
    "normalized_precision": 0.27232203226118035,
    "normalized_recall": 0.008787774076935715,
    "selection_accuracy": 0.806,
    "solvable_selection_accuracy": 0.8174442190669371
  },
  "EurusPRMStage1_min_scores": {
    "accuracy": 0.77958,
    "precision": 0.7798302845935235,
    "recall": 0.9994869821726305,
    "f1": 0.8761003248979775,
    "true_positives": 38965,
    "true_negatives": 14,
    "false_positives": 11001,
    "false_negatives": 20,
    "total": 50000,
    "normalized_precision": 0.7908475339849939,
    "normalized_recall": 0.9993979083303216,
    "selection_accuracy": 0.806,
    "solvable_selection_accuracy": 0.8174442190669371
  },
  "EurusPRMStage2_avg_scores": {
    "accuracy": 0.77966,
    "precision": 0.7801955598300874,
    "recall": 0.9987944081056817,
    "f1": 0.8760644820177067,
    "true_positives": 38938,
    "true_negatives": 45,
    "false_positives": 10970,
    "false_negatives": 47,
    "total": 50000,
    "normalized_precision": 0.7911124061658519,
    "normalized_recall": 0.9987563481682528,
    "selection_accuracy": 0.856,
    "solvable_selection_accuracy": 0.8681541582150102
  },
  "EurusPRMStage2_max_scores": {
    "accuracy": 0.23222,
    "precision": 0.9641744548286605,
    "recall": 0.01587790175708606,
    "f1": 0.03124132535897242,
    "true_positives": 619,
    "true_negatives": 10992,
    "false_positives": 23,
    "false_negatives": 38366,
    "total": 50000,
    "normalized_precision": 0.34450400849995166,
    "normalized_recall": 0.013622216540966767,
    "selection_accuracy": 0.818,
    "solvable_selection_accuracy": 0.8296146044624746
  },
  "EurusPRMStage2_min_scores": {
    "accuracy": 0.77898,
    "precision": 0.7800232567464613,
    "recall": 0.9979735795818905,
    "f1": 0.8756400301587837,
    "true_positives": 38906,
    "true_negatives": 43,
    "false_positives": 10972,
    "false_negatives": 79,
    "total": 50000,
    "normalized_precision": 0.7910656027083798,
    "normalized_recall": 0.9960504255092193,
    "selection_accuracy": 0.842,
    "solvable_selection_accuracy": 0.8539553752535497
  },
  "GPM_scores": {
    "accuracy": 0.23322,
    "precision": 0.7873665480427047,
    "recall": 0.022701038861100423,
    "f1": 0.04412974644094842,
    "true_positives": 885,
    "true_negatives": 10776,
    "false_positives": 239,
    "false_negatives": 38100,
    "total": 50000,
    "normalized_precision": 0.7100743745774172,
    "normalized_recall": 0.02445965651710675,
    "selection_accuracy": 0.794,
    "solvable_selection_accuracy": 0.8052738336713996
  },
  "GRMGemma_scores": {
    "accuracy": 0.6358,
    "precision": 0.7899673394188091,
    "recall": 0.7258945748364756,
    "f1": 0.756576836701957,
    "true_positives": 28299,
    "true_negatives": 3491,
    "false_positives": 7524,
    "false_negatives": 10686,
    "total": 50000,
    "normalized_precision": 0.7943324435860832,
    "normalized_recall": 0.7079626957327451,
    "selection_accuracy": 0.784,
    "solvable_selection_accuracy": 0.795131845841785
  },
  "GRMLlama32_scores": {
    "accuracy": 0.63778,
    "precision": 0.7940331304935767,
    "recall": 0.7229703732204694,
    "f1": 0.7568373142143635,
    "true_positives": 28185,
    "true_negatives": 3704,
    "false_positives": 7311,
    "false_negatives": 10800,
    "total": 50000,
    "normalized_precision": 0.7967432842070636,
    "normalized_recall": 0.7034754891275338,
    "selection_accuracy": 0.818,
    "solvable_selection_accuracy": 0.8296146044624746
  },
  "GRM_scores": {
    "accuracy": 0.4845,
    "precision": 0.8732060119787547,
    "recall": 0.3964088752084135,
    "f1": 0.5452781257167052,
    "true_positives": 15454,
    "true_negatives": 8771,
    "false_positives": 2244,
    "false_negatives": 23531,
    "total": 50000,
    "normalized_precision": 0.6835025816033925,
    "normalized_recall": 0.3610781494590972,
    "selection_accuracy": 0.802,
    "solvable_selection_accuracy": 0.8133874239350912
  },
  "INFORM_scores": {
    "accuracy": 0.63636,
    "precision": 0.8214009826035905,
    "recall": 0.6818776452481724,
    "f1": 0.7451645456074452,
    "true_positives": 26583,
    "true_negatives": 5235,
    "false_positives": 5780,
    "false_negatives": 12402,
    "total": 50000,
    "normalized_precision": 0.8015419410871341,
    "normalized_recall": 0.6478291843298004,
    "selection_accuracy": 0.822,
    "solvable_selection_accuracy": 0.8336713995943205
  },
  "InternLM2Reward7B_scores": {
    "accuracy": 0.51588,
    "precision": 0.9431484257871064,
    "recall": 0.40341156855200716,
    "f1": 0.5651095939633489,
    "true_positives": 15727,
    "true_negatives": 10067,
    "false_positives": 948,
    "false_negatives": 23258,
    "total": 50000,
    "normalized_precision": 0.7339773305714188,
    "normalized_recall": 0.338734096866303,
    "selection_accuracy": 0.78,
    "solvable_selection_accuracy": 0.7910750507099391
  },
  "InternLM2RewardModel_scores": {
    "accuracy": 0.61778,
    "precision": 0.846140448655427,
    "recall": 0.6230858022316276,
    "f1": 0.7176812964412864,
    "true_positives": 24291,
    "true_negatives": 6598,
    "false_positives": 4417,
    "false_negatives": 14694,
    "total": 50000,
    "normalized_precision": 0.8035212446381212,
    "normalized_recall": 0.5821469469064281,
    "selection_accuracy": 0.786,
    "solvable_selection_accuracy": 0.7971602434077079
  },
  "LDLRewardGemma_scores": {
    "accuracy": 0.60448,
    "precision": 0.7467690963747078,
    "recall": 0.7455431576247274,
    "f1": 0.7461556234436371,
    "true_positives": 29065,
    "true_negatives": 1159,
    "false_positives": 9856,
    "false_negatives": 9920,
    "total": 50000,
    "normalized_precision": 0.790323114120899,
    "normalized_recall": 0.77738821303112,
    "selection_accuracy": 0.756,
    "solvable_selection_accuracy": 0.7667342799188641
  },
  "OffsetBias_scores": {
    "accuracy": 0.67768,
    "precision": 0.7981538942921957,
    "recall": 0.7851737847890214,
    "f1": 0.791610634116065,
    "true_positives": 30610,
    "true_negatives": 3274,
    "false_positives": 7741,
    "false_negatives": 8375,
    "total": 50000,
    "normalized_precision": 0.7973906890570229,
    "normalized_recall": 0.760154001921718,
    "selection_accuracy": 0.784,
    "solvable_selection_accuracy": 0.795131845841785
  },
  "QRMGemma_scores": {
    "accuracy": 0.6717,
    "precision": 0.7888109740492399,
    "recall": 0.7906117737591382,
    "f1": 0.7897103473014003,
    "true_positives": 30822,
    "true_negatives": 2763,
    "false_positives": 8252,
    "false_negatives": 8163,
    "total": 50000,
    "normalized_precision": 0.794114321664759,
    "normalized_recall": 0.7718812392918687,
    "selection_accuracy": 0.808,
    "solvable_selection_accuracy": 0.8194726166328601
  },
  "QRM_scores": {
    "accuracy": 0.6727,
    "precision": 0.7906558388157895,
    "recall": 0.7891753238425036,
    "f1": 0.7899148876079952,
    "true_positives": 30766,
    "true_negatives": 2869,
    "false_positives": 8146,
    "false_negatives": 8219,
    "total": 50000,
    "normalized_precision": 0.7940120947793777,
    "normalized_recall": 0.7675094248405023,
    "selection_accuracy": 0.806,
    "solvable_selection_accuracy": 0.8174442190669371
  },
  "Qwen72B_scores": {
    "accuracy": 0.794,
    "precision": 0.8697568898399031,
    "recall": 0.8653841220982429,
    "f1": 0.867564996014092,
    "true_positives": 33737,
    "true_negatives": 5963,
    "false_positives": 5052,
    "false_negatives": 5248,
    "total": 50000,
    "normalized_precision": 0.8319909698787374,
    "normalized_recall": 0.8194374949707556,
    "selection_accuracy": 0.852,
    "solvable_selection_accuracy": 0.8640973630831643
  },
  "QwenPRM_avg_scores": {
    "accuracy": 0.77982,
    "precision": 0.7797935752290275,
    "recall": 1.0,
    "f1": 0.8762741770530125,
    "true_positives": 38985,
    "true_negatives": 6,
    "false_positives": 11009,
    "false_negatives": 0,
    "total": 50000,
    "normalized_precision": 0.7908173007970172,
    "normalized_recall": 1.0,
    "selection_accuracy": 0.85,
    "solvable_selection_accuracy": 0.8620689655172413
  },
  "QwenPRM_max_scores": {
    "accuracy": 0.77972,
    "precision": 0.7797155943118862,
    "recall": 1.0,
    "f1": 0.876224939314933,
    "true_positives": 38985,
    "true_negatives": 1,
    "false_positives": 11014,
    "false_negatives": 0,
    "total": 50000,
    "normalized_precision": 0.7907890261642802,
    "normalized_recall": 1.0,
    "selection_accuracy": 0.786,
    "solvable_selection_accuracy": 0.7971602434077079
  },
  "QwenPRM_min_scores": {
    "accuracy": 0.80426,
    "precision": 0.9362598613435333,
    "recall": 0.8036680774656919,
    "f1": 0.86491186903891,
    "true_positives": 31331,
    "true_negatives": 8882,
    "false_positives": 2133,
    "false_negatives": 7654,
    "total": 50000,
    "normalized_precision": 0.833499810619196,
    "normalized_recall": 0.7133918603019125,
    "selection_accuracy": 0.838,
    "solvable_selection_accuracy": 0.8498985801217038
  },
  "SkyworksGemma_scores": {
    "accuracy": 0.6355,
    "precision": 0.7998786618131392,
    "recall": 0.7101962293189689,
    "f1": 0.7523743529124036,
    "true_positives": 27687,
    "true_negatives": 4088,
    "false_positives": 6927,
    "false_negatives": 11298,
    "total": 50000,
    "normalized_precision": 0.80113651771004,
    "normalized_recall": 0.6887278324952171,
    "selection_accuracy": 0.822,
    "solvable_selection_accuracy": 0.8336713995943205
  },
  "Skyworks_scores": {
    "accuracy": 0.61292,
    "precision": 0.8524543071564509,
    "recall": 0.6089521610875978,
    "f1": 0.71041685369722,
    "true_positives": 23740,
    "true_negatives": 6906,
    "false_positives": 4109,
    "false_negatives": 15245,
    "total": 50000,
    "normalized_precision": 0.8009246201071385,
    "normalized_recall": 0.5657255925979724,
    "selection_accuracy": 0.81,
    "solvable_selection_accuracy": 0.821501014198783
  },
  "URM_scores": {
    "accuracy": 0.69912,
    "precision": 0.7821567472009429,
    "recall": 0.8511735282801077,
    "f1": 0.8152069770298489,
    "true_positives": 33183,
    "true_negatives": 1773,
    "false_positives": 9242,
    "false_negatives": 5802,
    "total": 50000,
    "normalized_precision": 0.7922199598579228,
    "normalized_recall": 0.8493337357586731,
    "selection_accuracy": 0.77,
    "solvable_selection_accuracy": 0.7809330628803245
  }
}