{
  "ArmorRM_scores": {
    "accuracy": 0.5122600619195047,
    "precision": 0.31847308929897417,
    "recall": 0.6325449281695527,
    "f1": 0.42364820370234874,
    "true_positives": 11580,
    "true_negatives": 21512,
    "false_positives": 24781,
    "false_negatives": 6727,
    "total": 64600,
    "normalized_precision": 0.3231431737702153,
    "normalized_recall": 0.6003801074533566,
    "selection_accuracy": 0.33436532507739936,
    "solvable_selection_accuracy": 0.35121951219512193
  },
  "EurusPRMStage1_avg_scores": {
    "accuracy": 0.2843167701863354,
    "precision": 0.284249926227344,
    "recall": 0.9997268804282515,
    "f1": 0.4426439644956103,
    "true_positives": 18302,
    "true_negatives": 8,
    "false_positives": 46085,
    "false_negatives": 5,
    "total": 64400,
    "normalized_precision": 0.29768548926823296,
    "normalized_recall": 0.9998633877103178,
    "selection_accuracy": 0.2639751552795031,
    "solvable_selection_accuracy": 0.2764227642276423
  },
  "EurusPRMStage1_max_scores": {
    "accuracy": 0.28468944099378884,
    "precision": 0.28428888923426016,
    "recall": 0.9991806412847545,
    "f1": 0.442637628554144,
    "true_positives": 18292,
    "true_negatives": 42,
    "false_positives": 46051,
    "false_negatives": 15,
    "total": 64400,
    "normalized_precision": 0.2977741012000508,
    "normalized_recall": 0.9995765424631768,
    "selection_accuracy": 0.265527950310559,
    "solvable_selection_accuracy": 0.2780487804878049
  },
  "EurusPRMStage1_min_scores": {
    "accuracy": 0.2848447204968944,
    "precision": 0.28434648802399864,
    "recall": 0.9992898891134538,
    "f1": 0.4427181646580514,
    "true_positives": 18294,
    "true_negatives": 50,
    "false_positives": 46043,
    "false_negatives": 13,
    "total": 64400,
    "normalized_precision": 0.2977188062618217,
    "normalized_recall": 0.9993688854155114,
    "selection_accuracy": 0.3167701863354037,
    "solvable_selection_accuracy": 0.33170731707317075
  },
  "EurusPRMStage2_avg_scores": {
    "accuracy": 0.28433229813664596,
    "precision": 0.2842610424302665,
    "recall": 0.9997815043426012,
    "f1": 0.44266279702521305,
    "true_positives": 18303,
    "true_negatives": 8,
    "false_positives": 46085,
    "false_negatives": 4,
    "total": 64400,
    "normalized_precision": 0.2977125023893228,
    "normalized_recall": 0.9999194572365303,
    "selection_accuracy": 0.30745341614906835,
    "solvable_selection_accuracy": 0.32195121951219513
  },
  "EurusPRMStage2_max_scores": {
    "accuracy": 0.28437888198757766,
    "precision": 0.28427428748932204,
    "recall": 0.9997815043426012,
    "f1": 0.442678856479466,
    "true_positives": 18303,
    "true_negatives": 11,
    "false_positives": 46082,
    "false_negatives": 4,
    "total": 64400,
    "normalized_precision": 0.29771384315721405,
    "normalized_recall": 0.9999194572365303,
    "selection_accuracy": 0.281055900621118,
    "solvable_selection_accuracy": 0.2943089430894309
  },
  "EurusPRMStage2_min_scores": {
    "accuracy": 0.2846583850931677,
    "precision": 0.28431357314893946,
    "recall": 0.999453760856503,
    "f1": 0.4426943456485446,
    "true_positives": 18297,
    "true_negatives": 35,
    "false_positives": 46058,
    "false_negatives": 10,
    "total": 64400,
    "normalized_precision": 0.29770801259651447,
    "normalized_recall": 0.9995401017641877,
    "selection_accuracy": 0.32608695652173914,
    "solvable_selection_accuracy": 0.34146341463414637
  },
  "GPM_scores": {
    "accuracy": 0.7078018575851394,
    "precision": 0.2722177742193755,
    "recall": 0.018572130878898783,
    "f1": 0.03477193700143178,
    "true_positives": 340,
    "true_negatives": 45384,
    "false_positives": 909,
    "false_negatives": 17967,
    "total": 64600,
    "normalized_precision": 0.24240030971738288,
    "normalized_recall": 0.017392898541479188,
    "selection_accuracy": 0.2693498452012384,
    "solvable_selection_accuracy": 0.28292682926829266
  },
  "GRMGemma_scores": {
    "accuracy": 0.5973219814241486,
    "precision": 0.32866417644966206,
    "recall": 0.4037253509586497,
    "f1": 0.362348327000858,
    "true_positives": 7391,
    "true_negatives": 31196,
    "false_positives": 15097,
    "false_negatives": 10916,
    "total": 64600,
    "normalized_precision": 0.31694786828623955,
    "normalized_recall": 0.3696396852507015,
    "selection_accuracy": 0.3188854489164087,
    "solvable_selection_accuracy": 0.33495934959349594
  },
  "GRMLlama32_scores": {
    "accuracy": 0.5832198142414861,
    "precision": 0.32784592639948856,
    "recall": 0.4481892172393074,
    "f1": 0.37868648174643466,
    "true_positives": 8205,
    "true_negatives": 29471,
    "false_positives": 16822,
    "false_negatives": 10102,
    "total": 64600,
    "normalized_precision": 0.31954679827547317,
    "normalized_recall": 0.40746013063916364,
    "selection_accuracy": 0.34210526315789475,
    "solvable_selection_accuracy": 0.359349593495935
  },
  "GRM_scores": {
    "accuracy": 0.6558049535603715,
    "precision": 0.36769064942064134,
    "recall": 0.29813732452067515,
    "f1": 0.32928116798889934,
    "true_positives": 5458,
    "true_negatives": 36907,
    "false_positives": 9386,
    "false_negatives": 12849,
    "total": 64600,
    "normalized_precision": 0.3170831239773807,
    "normalized_recall": 0.22908361035842917,
    "selection_accuracy": 0.3173374613003096,
    "solvable_selection_accuracy": 0.3333333333333333
  },
  "InternLM2Reward7B_scores": {
    "accuracy": 0.6597832817337461,
    "precision": 0.36478821362799263,
    "recall": 0.2704976238597258,
    "f1": 0.3106455053007967,
    "true_positives": 4952,
    "true_negatives": 37670,
    "false_positives": 8623,
    "false_negatives": 13355,
    "total": 64600,
    "normalized_precision": 0.3217374325441206,
    "normalized_recall": 0.2141056132707613,
    "selection_accuracy": 0.34365325077399383,
    "solvable_selection_accuracy": 0.36097560975609755
  },
  "OffsetBias_scores": {
    "accuracy": 0.4977863777089783,
    "precision": 0.3091792656587473,
    "recall": 0.6255530671327907,
    "f1": 0.4138255009304931,
    "true_positives": 11452,
    "true_negatives": 20705,
    "false_positives": 25588,
    "false_negatives": 6855,
    "total": 64600,
    "normalized_precision": 0.32033907115105054,
    "normalized_recall": 0.6204621527486672,
    "selection_accuracy": 0.326625386996904,
    "solvable_selection_accuracy": 0.34308943089430893
  },
  "QRM_scores": {
    "accuracy": 0.48891640866873065,
    "precision": 0.3119871155763479,
    "recall": 0.6666302507237669,
    "f1": 0.4250487601003065,
    "true_positives": 12204,
    "true_negatives": 19380,
    "false_positives": 26913,
    "false_negatives": 6103,
    "total": 64600,
    "normalized_precision": 0.3210864051699733,
    "normalized_recall": 0.6507949713755843,
    "selection_accuracy": 0.3684210526315789,
    "solvable_selection_accuracy": 0.38699186991869916
  },
  "QwenPRM_avg_scores": {
    "accuracy": 0.2900928792569659,
    "precision": 0.27690239834172725,
    "recall": 0.9340143114655596,
    "f1": 0.4271653051537635,
    "true_positives": 17099,
    "true_negatives": 1641,
    "false_positives": 44652,
    "false_negatives": 1208,
    "total": 64600,
    "normalized_precision": 0.2967920798070191,
    "normalized_recall": 0.9518716115893637,
    "selection_accuracy": 0.34210526315789475,
    "solvable_selection_accuracy": 0.359349593495935
  },
  "QwenPRM_max_scores": {
    "accuracy": 0.28738390092879257,
    "precision": 0.2767615612520933,
    "recall": 0.9388758398426831,
    "f1": 0.4275036997425725,
    "true_positives": 17188,
    "true_negatives": 1377,
    "false_positives": 44916,
    "false_negatives": 1119,
    "total": 64600,
    "normalized_precision": 0.2974897898414246,
    "normalized_recall": 0.9640046151793047,
    "selection_accuracy": 0.28637770897832815,
    "solvable_selection_accuracy": 0.3008130081300813
  },
  "QwenPRM_min_scores": {
    "accuracy": 0.6148297213622291,
    "precision": 0.34135984172175843,
    "recall": 0.38640957010979404,
    "f1": 0.3624903920061491,
    "true_positives": 7074,
    "true_negatives": 32644,
    "false_positives": 13649,
    "false_negatives": 11233,
    "total": 64600,
    "normalized_precision": 0.31730681592709614,
    "normalized_recall": 0.2878254780267319,
    "selection_accuracy": 0.33126934984520123,
    "solvable_selection_accuracy": 0.34796747967479674
  },
  "Skyworks_scores": {
    "accuracy": 0.6236996904024767,
    "precision": 0.3485872855701312,
    "recall": 0.3773966242420932,
    "f1": 0.3624203320481548,
    "true_positives": 6909,
    "true_negatives": 33382,
    "false_positives": 12911,
    "false_negatives": 11398,
    "total": 64600,
    "normalized_precision": 0.32874052351705896,
    "normalized_recall": 0.31168174635208623,
    "selection_accuracy": 0.33436532507739936,
    "solvable_selection_accuracy": 0.35121951219512193
  },
  "URM_scores": {
    "accuracy": 0.5813777089783282,
    "precision": 0.3316633266533066,
    "recall": 0.470093406893538,
    "f1": 0.38892780476782285,
    "true_positives": 8606,
    "true_negatives": 28951,
    "false_positives": 17342,
    "false_negatives": 9701,
    "total": 64600,
    "normalized_precision": 0.3265027562809124,
    "normalized_recall": 0.4108814120567664,
    "selection_accuracy": 0.3715170278637771,
    "solvable_selection_accuracy": 0.3902439024390244
  }
}