{
  "ArmorRM_scores": {
    "accuracy": 0.68222,
    "precision": 0.7069178840435,
    "recall": 0.9317102477541912,
    "f1": 0.8038951902546191,
    "true_positives": 32567,
    "true_negatives": 1544,
    "false_positives": 13502,
    "false_negatives": 2387,
    "total": 50000,
    "normalized_precision": 0.7624534819906641,
    "normalized_recall": 0.9231578815475739,
    "selection_accuracy": 0.688,
    "solvable_selection_accuracy": 0.7478260869565218
  },
  "DecisionTreeReward27B_scores": {
    "accuracy": 0.53854,
    "precision": 0.7054327904001106,
    "recall": 0.5835955827659209,
    "f1": 0.6387562430524025,
    "true_positives": 20399,
    "true_negatives": 6528,
    "false_positives": 8518,
    "false_negatives": 14555,
    "total": 50000,
    "normalized_precision": 0.7652380221321637,
    "normalized_recall": 0.575313211775705,
    "selection_accuracy": 0.732,
    "solvable_selection_accuracy": 0.7956521739130434
  },
  "DecisionTreeReward8B_scores": {
    "accuracy": 0.54228,
    "precision": 0.7212201202522364,
    "recall": 0.5627968186759741,
    "f1": 0.6322352563072472,
    "true_positives": 19672,
    "true_negatives": 7442,
    "false_positives": 7604,
    "false_negatives": 15282,
    "total": 50000,
    "normalized_precision": 0.7706805828921954,
    "normalized_recall": 0.5489920442967434,
    "selection_accuracy": 0.74,
    "solvable_selection_accuracy": 0.8043478260869565
  },
  "EurusPRMStage1_avg_scores": {
    "accuracy": 0.69542,
    "precision": 0.7013289240002449,
    "recall": 0.9828918006522859,
    "f1": 0.8185749514539974,
    "true_positives": 34356,
    "true_negatives": 415,
    "false_positives": 14631,
    "false_negatives": 598,
    "total": 50000,
    "normalized_precision": 0.7604610274853604,
    "normalized_recall": 0.9809309458139193,
    "selection_accuracy": 0.694,
    "solvable_selection_accuracy": 0.7543478260869565
  },
  "EurusPRMStage1_max_scores": {
    "accuracy": 0.30312,
    "precision": 0.8353658536585366,
    "recall": 0.003919436974309092,
    "f1": 0.007802266643886327,
    "true_positives": 137,
    "true_negatives": 15019,
    "false_positives": 27,
    "false_negatives": 34817,
    "total": 50000,
    "normalized_precision": 0.1532608695652174,
    "normalized_recall": 0.003999087982971305,
    "selection_accuracy": 0.692,
    "solvable_selection_accuracy": 0.7521739130434782
  },
  "EurusPRMStage1_min_scores": {
    "accuracy": 0.69898,
    "precision": 0.699049904990499,
    "recall": 0.9998569548549522,
    "f1": 0.8228231056280826,
    "true_positives": 34949,
    "true_negatives": 0,
    "false_positives": 15046,
    "false_negatives": 5,
    "total": 50000,
    "normalized_precision": 0.7598684672815104,
    "normalized_recall": 0.9998901789635486,
    "selection_accuracy": 0.708,
    "solvable_selection_accuracy": 0.7695652173913043
  },
  "EurusPRMStage2_avg_scores": {
    "accuracy": 0.69954,
    "precision": 0.6996414047318549,
    "recall": 0.9991417291297133,
    "f1": 0.8229901851044527,
    "true_positives": 34924,
    "true_negatives": 53,
    "false_positives": 14993,
    "false_negatives": 30,
    "total": 50000,
    "normalized_precision": 0.7601082997930801,
    "normalized_recall": 0.9990929628866602,
    "selection_accuracy": 0.742,
    "solvable_selection_accuracy": 0.8065217391304348
  },
  "EurusPRMStage2_max_scores": {
    "accuracy": 0.30118,
    "precision": 0.782608695652174,
    "recall": 0.0005149625221719975,
    "f1": 0.0010292477914057808,
    "true_positives": 18,
    "true_negatives": 15041,
    "false_positives": 5,
    "false_negatives": 34936,
    "total": 50000,
    "normalized_precision": 0.02608695652173913,
    "normalized_recall": 0.0004204621150128258,
    "selection_accuracy": 0.722,
    "solvable_selection_accuracy": 0.7847826086956522
  },
  "EurusPRMStage2_min_scores": {
    "accuracy": 0.69914,
    "precision": 0.6991458462523255,
    "recall": 0.9999141729129714,
    "f1": 0.8229089410795222,
    "true_positives": 34951,
    "true_negatives": 6,
    "false_positives": 15040,
    "false_negatives": 3,
    "total": 50000,
    "normalized_precision": 0.759953026593194,
    "normalized_recall": 0.9999338768115943,
    "selection_accuracy": 0.734,
    "solvable_selection_accuracy": 0.7978260869565217
  },
  "GPM_scores": {
    "accuracy": 0.30966,
    "precision": 0.6904969485614647,
    "recall": 0.022658350975567888,
    "f1": 0.04387690091687211,
    "true_positives": 792,
    "true_negatives": 14691,
    "false_positives": 355,
    "false_negatives": 34162,
    "total": 50000,
    "normalized_precision": 0.6757125603864735,
    "normalized_recall": 0.02178840673524681,
    "selection_accuracy": 0.696,
    "solvable_selection_accuracy": 0.7565217391304347
  },
  "GRMGemma_scores": {
    "accuracy": 0.50362,
    "precision": 0.7228746096670625,
    "recall": 0.4702180008010528,
    "f1": 0.5697942486696366,
    "true_positives": 16436,
    "true_negatives": 8745,
    "false_positives": 6301,
    "false_negatives": 18518,
    "total": 50000,
    "normalized_precision": 0.7697895927500983,
    "normalized_recall": 0.4602754972253228,
    "selection_accuracy": 0.73,
    "solvable_selection_accuracy": 0.7934782608695652
  },
  "GRMLlama32_scores": {
    "accuracy": 0.48834,
    "precision": 0.7255789321650378,
    "recall": 0.4311666762030097,
    "f1": 0.5409062359802602,
    "true_positives": 15071,
    "true_negatives": 9346,
    "false_positives": 5700,
    "false_negatives": 19883,
    "total": 50000,
    "normalized_precision": 0.7582537618097375,
    "normalized_recall": 0.42525896338827945,
    "selection_accuracy": 0.73,
    "solvable_selection_accuracy": 0.7934782608695652
  },
  "GRM_scores": {
    "accuracy": 0.436,
    "precision": 0.8257765772718503,
    "recall": 0.24489328832179436,
    "f1": 0.37775816416593117,
    "true_positives": 8560,
    "true_negatives": 13240,
    "false_positives": 1806,
    "false_negatives": 26394,
    "total": 50000,
    "normalized_precision": 0.6884151404418334,
    "normalized_recall": 0.2220116687546561,
    "selection_accuracy": 0.732,
    "solvable_selection_accuracy": 0.7956521739130434
  },
  "INFORM_scores": {
    "accuracy": 0.53096,
    "precision": 0.751662874146683,
    "recall": 0.49141729129713335,
    "f1": 0.5942981697401654,
    "true_positives": 17177,
    "true_negatives": 9371,
    "false_positives": 5675,
    "false_negatives": 17777,
    "total": 50000,
    "normalized_precision": 0.7842124935757603,
    "normalized_recall": 0.4708799151671158,
    "selection_accuracy": 0.756,
    "solvable_selection_accuracy": 0.8217391304347826
  },
  "InternLM2Reward7B_scores": {
    "accuracy": 0.4719,
    "precision": 0.7713451406081381,
    "recall": 0.34762831149510787,
    "f1": 0.4792632180961209,
    "true_positives": 12151,
    "true_negatives": 11444,
    "false_positives": 3602,
    "false_negatives": 22803,
    "total": 50000,
    "normalized_precision": 0.7651043198046924,
    "normalized_recall": 0.32040597109381913,
    "selection_accuracy": 0.726,
    "solvable_selection_accuracy": 0.7891304347826087
  },
  "InternLM2RewardModel_scores": {
    "accuracy": 0.4932,
    "precision": 0.7568939717828131,
    "recall": 0.40518967786233334,
    "f1": 0.5278202213692095,
    "true_positives": 14163,
    "true_negatives": 10497,
    "false_positives": 4549,
    "false_negatives": 20791,
    "total": 50000,
    "normalized_precision": 0.78516137359449,
    "normalized_recall": 0.37649568597292854,
    "selection_accuracy": 0.736,
    "solvable_selection_accuracy": 0.8
  },
  "LDLRewardGemma_scores": {
    "accuracy": 0.5355,
    "precision": 0.7139001349527665,
    "recall": 0.5599645248040281,
    "f1": 0.6276314312741498,
    "true_positives": 19573,
    "true_negatives": 7202,
    "false_positives": 7844,
    "false_negatives": 15381,
    "total": 50000,
    "normalized_precision": 0.7644836996859546,
    "normalized_recall": 0.5476927516657517,
    "selection_accuracy": 0.736,
    "solvable_selection_accuracy": 0.8
  },
  "OffsetBias_scores": {
    "accuracy": 0.55506,
    "precision": 0.7352886716290783,
    "recall": 0.5680322709847228,
    "f1": 0.6409283858159691,
    "true_positives": 19855,
    "true_negatives": 7898,
    "false_positives": 7148,
    "false_negatives": 15099,
    "total": 50000,
    "normalized_precision": 0.7758164889891903,
    "normalized_recall": 0.5553961760152845,
    "selection_accuracy": 0.736,
    "solvable_selection_accuracy": 0.8
  },
  "QRMGemma_scores": {
    "accuracy": 0.545,
    "precision": 0.7125391849529781,
    "recall": 0.5852549064484751,
    "f1": 0.6426551897461674,
    "true_positives": 20457,
    "true_negatives": 6793,
    "false_positives": 8253,
    "false_negatives": 14497,
    "total": 50000,
    "normalized_precision": 0.7683826803577553,
    "normalized_recall": 0.5752582592709574,
    "selection_accuracy": 0.728,
    "solvable_selection_accuracy": 0.7913043478260869
  },
  "QRM_scores": {
    "accuracy": 0.54058,
    "precision": 0.708045418243689,
    "recall": 0.5833667105338445,
    "f1": 0.639687544115571,
    "true_positives": 20391,
    "true_negatives": 6638,
    "false_positives": 8408,
    "false_negatives": 14563,
    "total": 50000,
    "normalized_precision": 0.7658633136060021,
    "normalized_recall": 0.574021631748466,
    "selection_accuracy": 0.732,
    "solvable_selection_accuracy": 0.7956521739130434
  },
  "Qwen72B_scores": {
    "accuracy": 0.71662,
    "precision": 0.7526314510051778,
    "recall": 0.8857641471648452,
    "f1": 0.813788752940558,
    "true_positives": 30961,
    "true_negatives": 4870,
    "false_positives": 10176,
    "false_negatives": 3993,
    "total": 50000,
    "normalized_precision": 0.7844041650438086,
    "normalized_recall": 0.8539307649571924,
    "selection_accuracy": 0.734,
    "solvable_selection_accuracy": 0.7978260869565217
  },
  "QwenPRM_avg_scores": {
    "accuracy": 0.70002,
    "precision": 0.7000180422187919,
    "recall": 0.9989986839846655,
    "f1": 0.8232021405754564,
    "true_positives": 34919,
    "true_negatives": 82,
    "false_positives": 14964,
    "false_negatives": 35,
    "total": 50000,
    "normalized_precision": 0.7601926896423103,
    "normalized_recall": 0.9978692063186321,
    "selection_accuracy": 0.716,
    "solvable_selection_accuracy": 0.7782608695652173
  },
  "QwenPRM_max_scores": {
    "accuracy": 0.69916,
    "precision": 0.6991438972676721,
    "recall": 0.9999713909709904,
    "f1": 0.8229269670857464,
    "true_positives": 34953,
    "true_negatives": 5,
    "false_positives": 15041,
    "false_negatives": 1,
    "total": 50000,
    "normalized_precision": 0.7598921692524174,
    "normalized_recall": 0.9999782608695652,
    "selection_accuracy": 0.708,
    "solvable_selection_accuracy": 0.7695652173913043
  },
  "QwenPRM_min_scores": {
    "accuracy": 0.65942,
    "precision": 0.8205816074686125,
    "recall": 0.6563197345082108,
    "f1": 0.7293160178665099,
    "true_positives": 22941,
    "true_negatives": 10030,
    "false_positives": 5016,
    "false_negatives": 12013,
    "total": 50000,
    "normalized_precision": 0.7849041716638181,
    "normalized_recall": 0.5711269001718694,
    "selection_accuracy": 0.74,
    "solvable_selection_accuracy": 0.8043478260869565
  },
  "SkyworksGemma_scores": {
    "accuracy": 0.50694,
    "precision": 0.7589101694063238,
    "recall": 0.43191051095725813,
    "f1": 0.5505132459387022,
    "true_positives": 15097,
    "true_negatives": 10250,
    "false_positives": 4796,
    "false_negatives": 19857,
    "total": 50000,
    "normalized_precision": 0.7553498902863353,
    "normalized_recall": 0.41306546692747326,
    "selection_accuracy": 0.732,
    "solvable_selection_accuracy": 0.7956521739130434
  },
  "Skyworks_scores": {
    "accuracy": 0.46134,
    "precision": 0.7946513849092646,
    "recall": 0.30943525776735137,
    "f1": 0.44542365901369296,
    "true_positives": 10816,
    "true_negatives": 12251,
    "false_positives": 2795,
    "false_negatives": 24138,
    "total": 50000,
    "normalized_precision": 0.7211468471322897,
    "normalized_recall": 0.2787880340478062,
    "selection_accuracy": 0.748,
    "solvable_selection_accuracy": 0.8130434782608695
  },
  "URM_scores": {
    "accuracy": 0.6891,
    "precision": 0.7006927928859477,
    "recall": 0.9693311209017565,
    "f1": 0.8134055144102077,
    "true_positives": 33882,
    "true_negatives": 573,
    "false_positives": 14473,
    "false_negatives": 1072,
    "total": 50000,
    "normalized_precision": 0.7601459753903727,
    "normalized_recall": 0.9676400342502149,
    "selection_accuracy": 0.696,
    "solvable_selection_accuracy": 0.7565217391304347
  }
}