{
  "ArmorRM_scores": {
    "accuracy": 0.48802,
    "precision": 0.4714777995388849,
    "recall": 0.8162932440552837,
    "f1": 0.5977213797438518,
    "true_positives": 19018,
    "true_negatives": 5383,
    "false_positives": 21319,
    "false_negatives": 4280,
    "total": 50000,
    "normalized_precision": 0.48238089155838904,
    "normalized_recall": 0.7974275728412523,
    "selection_accuracy": 0.45,
    "solvable_selection_accuracy": 0.46487603305785125
  },
  "EurusPRMStage1_avg_scores": {
    "accuracy": 0.46958,
    "precision": 0.4673633473074508,
    "recall": 0.9905142072280883,
    "f1": 0.6350739594083247,
    "true_positives": 23077,
    "true_negatives": 402,
    "false_positives": 26300,
    "false_negatives": 221,
    "total": 50000,
    "normalized_precision": 0.48264099952851397,
    "normalized_recall": 0.9901132373068169,
    "selection_accuracy": 0.438,
    "solvable_selection_accuracy": 0.4524793388429752
  },
  "EurusPRMStage1_max_scores": {
    "accuracy": 0.50882,
    "precision": 0.3520994604738447,
    "recall": 0.06442613099836896,
    "f1": 0.10892202750263053,
    "true_positives": 1501,
    "true_negatives": 23940,
    "false_positives": 2762,
    "false_negatives": 21797,
    "total": 50000,
    "normalized_precision": 0.4083721274885798,
    "normalized_recall": 0.08221191792028408,
    "selection_accuracy": 0.452,
    "solvable_selection_accuracy": 0.4669421487603306
  },
  "EurusPRMStage1_min_scores": {
    "accuracy": 0.46588,
    "precision": 0.4659077267816414,
    "recall": 0.9995278564683664,
    "f1": 0.6355622270742359,
    "true_positives": 23287,
    "true_negatives": 7,
    "false_positives": 26695,
    "false_negatives": 11,
    "total": 50000,
    "normalized_precision": 0.4813719843058686,
    "normalized_recall": 0.9996963417341403,
    "selection_accuracy": 0.512,
    "solvable_selection_accuracy": 0.5289256198347108
  },
  "EurusPRMStage2_avg_scores": {
    "accuracy": 0.46614,
    "precision": 0.46600989167217316,
    "recall": 0.9989269465190145,
    "f1": 0.635535711847513,
    "true_positives": 23273,
    "true_negatives": 34,
    "false_positives": 26668,
    "false_negatives": 25,
    "total": 50000,
    "normalized_precision": 0.4814164459334336,
    "normalized_recall": 0.9989239731138029,
    "selection_accuracy": 0.548,
    "solvable_selection_accuracy": 0.5661157024793388
  },
  "EurusPRMStage2_max_scores": {
    "accuracy": 0.46796,
    "precision": 0.46680597974602156,
    "recall": 0.9971671388101983,
    "f1": 0.6359182109325816,
    "true_positives": 23232,
    "true_negatives": 166,
    "false_positives": 26536,
    "false_negatives": 66,
    "total": 50000,
    "normalized_precision": 0.48154272183584046,
    "normalized_recall": 0.9967799375960466,
    "selection_accuracy": 0.498,
    "solvable_selection_accuracy": 0.5144628099173554
  },
  "EurusPRMStage2_min_scores": {
    "accuracy": 0.46582,
    "precision": 0.46587430217900233,
    "recall": 0.9993561679114087,
    "f1": 0.635496417604913,
    "true_positives": 23283,
    "true_negatives": 8,
    "false_positives": 26694,
    "false_negatives": 15,
    "total": 50000,
    "normalized_precision": 0.4813538275315135,
    "normalized_recall": 0.9995847019263174,
    "selection_accuracy": 0.564,
    "solvable_selection_accuracy": 0.5826446280991735
  },
  "GPM_scores": {
    "accuracy": 0.53386,
    "precision": 0.49554896142433236,
    "recall": 0.021503991758949265,
    "f1": 0.041219301493274095,
    "true_positives": 501,
    "true_negatives": 26192,
    "false_positives": 510,
    "false_negatives": 22797,
    "total": 50000,
    "normalized_precision": 0.4180957300275482,
    "normalized_recall": 0.0199500734942813,
    "selection_accuracy": 0.452,
    "solvable_selection_accuracy": 0.4669421487603306
  },
  "GRMGemma_scores": {
    "accuracy": 0.53712,
    "precision": 0.5092570329406108,
    "recall": 0.18181818181818182,
    "f1": 0.2679655870445344,
    "true_positives": 4236,
    "true_negatives": 22620,
    "false_positives": 4082,
    "false_negatives": 19062,
    "total": 50000,
    "normalized_precision": 0.5124777121407912,
    "normalized_recall": 0.17749222648381674,
    "selection_accuracy": 0.516,
    "solvable_selection_accuracy": 0.5330578512396694
  },
  "GRMLlama32_scores": {
    "accuracy": 0.5252,
    "precision": 0.47788673203922355,
    "recall": 0.20499613700746847,
    "f1": 0.2869157755616965,
    "true_positives": 4776,
    "true_negatives": 21484,
    "false_positives": 5218,
    "false_negatives": 18522,
    "total": 50000,
    "normalized_precision": 0.501865172879287,
    "normalized_recall": 0.20975363999764599,
    "selection_accuracy": 0.508,
    "solvable_selection_accuracy": 0.5247933884297521
  },
  "GRM_scores": {
    "accuracy": 0.5478,
    "precision": 0.5953436807095344,
    "recall": 0.0921967550862735,
    "f1": 0.15966698877573776,
    "true_positives": 2148,
    "true_negatives": 25242,
    "false_positives": 1460,
    "false_negatives": 21150,
    "total": 50000,
    "normalized_precision": 0.48708099744962396,
    "normalized_recall": 0.07707467710635008,
    "selection_accuracy": 0.516,
    "solvable_selection_accuracy": 0.5330578512396694
  },
  "InternLM2Reward7B_scores": {
    "accuracy": 0.55528,
    "precision": 0.6372093023255814,
    "recall": 0.10584599536440896,
    "f1": 0.18153710247349822,
    "true_positives": 2466,
    "true_negatives": 25298,
    "false_positives": 1404,
    "false_negatives": 20832,
    "total": 50000,
    "normalized_precision": 0.5132458080988126,
    "normalized_recall": 0.0875157257528584,
    "selection_accuracy": 0.514,
    "solvable_selection_accuracy": 0.53099173553719
  },
  "OffsetBias_scores": {
    "accuracy": 0.51902,
    "precision": 0.46609480812641085,
    "recall": 0.22156408275388445,
    "f1": 0.30035202048119164,
    "true_positives": 5162,
    "true_negatives": 20789,
    "false_positives": 5913,
    "false_negatives": 18136,
    "total": 50000,
    "normalized_precision": 0.49707744723631153,
    "normalized_recall": 0.2336650145934045,
    "selection_accuracy": 0.534,
    "solvable_selection_accuracy": 0.5516528925619835
  },
  "QRM_scores": {
    "accuracy": 0.50116,
    "precision": 0.4717816683831102,
    "recall": 0.5898789595673448,
    "f1": 0.5242618448157472,
    "true_positives": 13743,
    "true_negatives": 11315,
    "false_positives": 15387,
    "false_negatives": 9555,
    "total": 50000,
    "normalized_precision": 0.49836916504273193,
    "normalized_recall": 0.6035527246088114,
    "selection_accuracy": 0.538,
    "solvable_selection_accuracy": 0.5557851239669421
  },
  "QwenPRM_avg_scores": {
    "accuracy": 0.47652,
    "precision": 0.4706817811123797,
    "recall": 0.9909005064812431,
    "f1": 0.6382108202250297,
    "true_positives": 23086,
    "true_negatives": 740,
    "false_positives": 25962,
    "false_negatives": 212,
    "total": 50000,
    "normalized_precision": 0.4834931444172284,
    "normalized_recall": 0.9871481632071553,
    "selection_accuracy": 0.554,
    "solvable_selection_accuracy": 0.5723140495867769
  },
  "QwenPRM_max_scores": {
    "accuracy": 0.4687,
    "precision": 0.46712419747620104,
    "recall": 0.996222851746931,
    "f1": 0.6360211002260738,
    "true_positives": 23210,
    "true_negatives": 225,
    "false_positives": 26477,
    "false_negatives": 88,
    "total": 50000,
    "normalized_precision": 0.4816160657431202,
    "normalized_recall": 0.9960235886990293,
    "selection_accuracy": 0.474,
    "solvable_selection_accuracy": 0.4896694214876033
  },
  "QwenPRM_min_scores": {
    "accuracy": 0.65436,
    "precision": 0.6490880253766852,
    "recall": 0.5621083354794403,
    "f1": 0.6024750425541703,
    "true_positives": 13096,
    "true_negatives": 19622,
    "false_positives": 7080,
    "false_negatives": 10202,
    "total": 50000,
    "normalized_precision": 0.5461591456755389,
    "normalized_recall": 0.4124547222271303,
    "selection_accuracy": 0.544,
    "solvable_selection_accuracy": 0.5619834710743802
  },
  "Skyworks_scores": {
    "accuracy": 0.5537,
    "precision": 0.5866690178099101,
    "recall": 0.1428019572495493,
    "f1": 0.22969381062515098,
    "true_positives": 3327,
    "true_negatives": 24358,
    "false_positives": 2344,
    "false_negatives": 19971,
    "total": 50000,
    "normalized_precision": 0.5152500014284117,
    "normalized_recall": 0.11491415661297968,
    "selection_accuracy": 0.534,
    "solvable_selection_accuracy": 0.5516528925619835
  },
  "URM_scores": {
    "accuracy": 0.50098,
    "precision": 0.47374606905752675,
    "recall": 0.6401407846167053,
    "f1": 0.5445152339400134,
    "true_positives": 14914,
    "true_negatives": 10135,
    "false_positives": 16567,
    "false_negatives": 8384,
    "total": 50000,
    "normalized_precision": 0.48470098781206084,
    "normalized_recall": 0.635994440307167,
    "selection_accuracy": 0.456,
    "solvable_selection_accuracy": 0.47107438016528924
  }
}