{
  "ArmorRM_scores": {
    "accuracy": 0.7342222222222222,
    "precision": 0.1226172076249356,
    "recall": 0.25674217907227614,
    "f1": 0.16596931659693165,
    "true_positives": 238,
    "true_negatives": 6370,
    "false_positives": 1703,
    "false_negatives": 689,
    "total": 9000,
    "normalized_precision": 0.2268101818051698,
    "normalized_recall": 0.3019872485773915,
    "selection_accuracy": 0.15555555555555556,
    "solvable_selection_accuracy": 0.2978723404255319
  },
  "EurusPRMStage1_avg_scores": {
    "accuracy": 0.202,
    "precision": 0.10734463276836158,
    "recall": 0.9223300970873787,
    "f1": 0.19230769230769232,
    "true_positives": 855,
    "true_negatives": 963,
    "false_positives": 7110,
    "false_negatives": 72,
    "total": 9000,
    "normalized_precision": 0.20328629602493378,
    "normalized_recall": 0.8986022049581894,
    "selection_accuracy": 0.1,
    "solvable_selection_accuracy": 0.19148936170212766
  },
  "EurusPRMStage1_max_scores": {
    "accuracy": 0.8835555555555555,
    "precision": 0.19900497512437812,
    "recall": 0.043149946062567425,
    "f1": 0.07092198581560284,
    "true_positives": 40,
    "true_negatives": 7912,
    "false_positives": 161,
    "false_negatives": 887,
    "total": 9000,
    "normalized_precision": 0.2099451966473243,
    "normalized_recall": 0.04208517828366296,
    "selection_accuracy": 0.15555555555555556,
    "solvable_selection_accuracy": 0.2978723404255319
  },
  "EurusPRMStage1_min_scores": {
    "accuracy": 0.10388888888888889,
    "precision": 0.10300333704115684,
    "recall": 0.9989212513484358,
    "f1": 0.18675002520923664,
    "true_positives": 926,
    "true_negatives": 9,
    "false_positives": 8064,
    "false_negatives": 1,
    "total": 9000,
    "normalized_precision": 0.1975129935920209,
    "normalized_recall": 0.9997405293201868,
    "selection_accuracy": 0.16666666666666666,
    "solvable_selection_accuracy": 0.3191489361702128
  },
  "EurusPRMStage2_avg_scores": {
    "accuracy": 0.12766666666666668,
    "precision": 0.10407136322049405,
    "recall": 0.9816612729234089,
    "f1": 0.1881915003619067,
    "true_positives": 910,
    "true_negatives": 239,
    "false_positives": 7834,
    "false_negatives": 17,
    "total": 9000,
    "normalized_precision": 0.19845770728735332,
    "normalized_recall": 0.9856698109147857,
    "selection_accuracy": 0.12222222222222222,
    "solvable_selection_accuracy": 0.23404255319148937
  },
  "EurusPRMStage2_max_scores": {
    "accuracy": 0.8965555555555556,
    "precision": 0.3333333333333333,
    "recall": 0.004314994606256742,
    "f1": 0.008519701810436634,
    "true_positives": 4,
    "true_negatives": 8065,
    "false_positives": 8,
    "false_negatives": 923,
    "total": 9000,
    "normalized_precision": 0.07446808510638298,
    "normalized_recall": 0.0017117972814373248,
    "selection_accuracy": 0.13333333333333333,
    "solvable_selection_accuracy": 0.2553191489361702
  },
  "EurusPRMStage2_min_scores": {
    "accuracy": 0.10566666666666667,
    "precision": 0.10301003344481606,
    "recall": 0.9967637540453075,
    "f1": 0.18672324946953625,
    "true_positives": 924,
    "true_negatives": 27,
    "false_positives": 8046,
    "false_negatives": 3,
    "total": 9000,
    "normalized_precision": 0.19754472714395002,
    "normalized_recall": 0.9987129301698044,
    "selection_accuracy": 0.16666666666666666,
    "solvable_selection_accuracy": 0.3191489361702128
  },
  "GPM_scores": {
    "accuracy": 0.8748888888888889,
    "precision": 0.08713692946058091,
    "recall": 0.022653721682847898,
    "f1": 0.03595890410958904,
    "true_positives": 21,
    "true_negatives": 7853,
    "false_positives": 220,
    "false_negatives": 906,
    "total": 9000,
    "normalized_precision": 0.1367274569402229,
    "normalized_recall": 0.05332509999438726,
    "selection_accuracy": 0.1,
    "solvable_selection_accuracy": 0.19148936170212766
  },
  "GRMGemma_scores": {
    "accuracy": 0.8208888888888889,
    "precision": 0.12154696132596685,
    "recall": 0.1186623516720604,
    "f1": 0.12008733624454147,
    "true_positives": 110,
    "true_negatives": 7278,
    "false_positives": 795,
    "false_negatives": 817,
    "total": 9000,
    "normalized_precision": 0.21541292503364654,
    "normalized_recall": 0.17604157076575283,
    "selection_accuracy": 0.12222222222222222,
    "solvable_selection_accuracy": 0.23404255319148937
  },
  "GRMLlama32_scores": {
    "accuracy": 0.8363333333333334,
    "precision": 0.12292817679558012,
    "recall": 0.09600862998921252,
    "f1": 0.10781344639612356,
    "true_positives": 89,
    "true_negatives": 7438,
    "false_positives": 635,
    "false_negatives": 838,
    "total": 9000,
    "normalized_precision": 0.22943137379933373,
    "normalized_recall": 0.11458763381721289,
    "selection_accuracy": 0.16666666666666666,
    "solvable_selection_accuracy": 0.3191489361702128
  },
  "GRM_scores": {
    "accuracy": 0.856,
    "precision": 0.11958762886597939,
    "recall": 0.06256742179072276,
    "f1": 0.0821529745042493,
    "true_positives": 58,
    "true_negatives": 7646,
    "false_positives": 427,
    "false_negatives": 869,
    "total": 9000,
    "normalized_precision": 0.23677581283964264,
    "normalized_recall": 0.0864679443584123,
    "selection_accuracy": 0.1111111111111111,
    "solvable_selection_accuracy": 0.2127659574468085
  },
  "InternLM2Reward7B_scores": {
    "accuracy": 0.8741111111111111,
    "precision": 0.1611842105263158,
    "recall": 0.05285868392664509,
    "f1": 0.07961007311129165,
    "true_positives": 49,
    "true_negatives": 7818,
    "false_positives": 255,
    "false_negatives": 878,
    "total": 9000,
    "normalized_precision": 0.2276258020938872,
    "normalized_recall": 0.06918871114314802,
    "selection_accuracy": 0.14444444444444443,
    "solvable_selection_accuracy": 0.2765957446808511
  },
  "OffsetBias_scores": {
    "accuracy": 0.7326666666666667,
    "precision": 0.11017395888244597,
    "recall": 0.22545846817691478,
    "f1": 0.14801699716713881,
    "true_positives": 209,
    "true_negatives": 6385,
    "false_positives": 1688,
    "false_negatives": 718,
    "total": 9000,
    "normalized_precision": 0.20711201658210157,
    "normalized_recall": 0.2671990377860619,
    "selection_accuracy": 0.07777777777777778,
    "solvable_selection_accuracy": 0.14893617021276595
  },
  "QRM_scores": {
    "accuracy": 0.7574444444444445,
    "precision": 0.1147239263803681,
    "recall": 0.2017259978425027,
    "f1": 0.1462651544779038,
    "true_positives": 187,
    "true_negatives": 6630,
    "false_positives": 1443,
    "false_negatives": 740,
    "total": 9000,
    "normalized_precision": 0.20973341850417226,
    "normalized_recall": 0.23859913479387962,
    "selection_accuracy": 0.14444444444444443,
    "solvable_selection_accuracy": 0.2765957446808511
  },
  "QwenPRM_avg_scores": {
    "accuracy": 0.13877777777777778,
    "precision": 0.10371660859465738,
    "recall": 0.9633225458468176,
    "f1": 0.18727063017720458,
    "true_positives": 893,
    "true_negatives": 356,
    "false_positives": 7717,
    "false_negatives": 34,
    "total": 9000,
    "normalized_precision": 0.19927577771520757,
    "normalized_recall": 0.9599930080523581,
    "selection_accuracy": 0.18888888888888888,
    "solvable_selection_accuracy": 0.3617021276595745
  },
  "QwenPRM_max_scores": {
    "accuracy": 0.10333333333333333,
    "precision": 0.10303434478159386,
    "recall": 1.0,
    "f1": 0.186819830713422,
    "true_positives": 927,
    "true_negatives": 3,
    "false_positives": 8070,
    "false_negatives": 0,
    "total": 9000,
    "normalized_precision": 0.1972383408553621,
    "normalized_recall": 1.0,
    "selection_accuracy": 0.1111111111111111,
    "solvable_selection_accuracy": 0.2127659574468085
  },
  "QwenPRM_min_scores": {
    "accuracy": 0.8888888888888888,
    "precision": 0.4041994750656168,
    "recall": 0.16612729234088458,
    "f1": 0.23547400611620797,
    "true_positives": 154,
    "true_negatives": 7846,
    "false_positives": 227,
    "false_negatives": 773,
    "total": 9000,
    "normalized_precision": 0.20725844531598406,
    "normalized_recall": 0.10509687755754528,
    "selection_accuracy": 0.15555555555555556,
    "solvable_selection_accuracy": 0.2978723404255319
  },
  "Skyworks_scores": {
    "accuracy": 0.8248888888888889,
    "precision": 0.12311265969802555,
    "recall": 0.11434735706580366,
    "f1": 0.11856823266219238,
    "true_positives": 106,
    "true_negatives": 7318,
    "false_positives": 755,
    "false_negatives": 821,
    "total": 9000,
    "normalized_precision": 0.23128275233538395,
    "normalized_recall": 0.13026494340243414,
    "selection_accuracy": 0.12222222222222222,
    "solvable_selection_accuracy": 0.23404255319148937
  },
  "URM_scores": {
    "accuracy": 0.8264444444444444,
    "precision": 0.1295215869311552,
    "recall": 0.11974110032362459,
    "f1": 0.12443946188340807,
    "true_positives": 111,
    "true_negatives": 7327,
    "false_positives": 746,
    "false_negatives": 816,
    "total": 9000,
    "normalized_precision": 0.23317325793529595,
    "normalized_recall": 0.14426795597137407,
    "selection_accuracy": 0.1111111111111111,
    "solvable_selection_accuracy": 0.2127659574468085
  }
}