{
  "ArmorRM_scores": {
    "accuracy": 0.6255702280912365,
    "precision": 0.872282965435757,
    "recall": 0.6560917464840389,
    "f1": 0.7488970469841882,
    "true_positives": 46511,
    "true_negatives": 5599,
    "false_positives": 6810,
    "false_negatives": 24380,
    "total": 83300,
    "normalized_precision": 0.8645939613818486,
    "normalized_recall": 0.6313834781447881,
    "selection_accuracy": 0.8883553421368547,
    "solvable_selection_accuracy": 0.891566265060241
  },
  "DecisionTreeReward27B_scores": {
    "accuracy": 0.5733973589435775,
    "precision": 0.8459731872003131,
    "recall": 0.6097388949231919,
    "f1": 0.7086878822159919,
    "true_positives": 43225,
    "true_negatives": 4539,
    "false_positives": 7870,
    "false_negatives": 27666,
    "total": 83300,
    "normalized_precision": 0.8590762216295965,
    "normalized_recall": 0.6095121855557589,
    "selection_accuracy": 0.8931572629051621,
    "solvable_selection_accuracy": 0.8963855421686747
  },
  "DecisionTreeReward8B_scores": {
    "accuracy": 0.5808403361344537,
    "precision": 0.8438437864393172,
    "recall": 0.6227024587042078,
    "f1": 0.7166001103860263,
    "true_positives": 44144,
    "true_negatives": 4240,
    "false_positives": 8169,
    "false_negatives": 26747,
    "total": 83300,
    "normalized_precision": 0.8571853671900451,
    "normalized_recall": 0.6243325261478032,
    "selection_accuracy": 0.8943577430972389,
    "solvable_selection_accuracy": 0.8975903614457831
  },
  "EurusPRMStage1_avg_scores": {
    "accuracy": 0.8469507803121249,
    "precision": 0.8522049915192634,
    "recall": 0.992241610359566,
    "f1": 0.9169072742796436,
    "true_positives": 70341,
    "true_negatives": 210,
    "false_positives": 12199,
    "false_negatives": 550,
    "total": 83300,
    "normalized_precision": 0.8548144889336468,
    "normalized_recall": 0.9909787976100681,
    "selection_accuracy": 0.8523409363745498,
    "solvable_selection_accuracy": 0.8554216867469879
  },
  "EurusPRMStage1_max_scores": {
    "accuracy": 0.15016806722689074,
    "precision": 0.8424657534246576,
    "recall": 0.0017350580468606734,
    "f1": 0.0034629840787195405,
    "true_positives": 123,
    "true_negatives": 12386,
    "false_positives": 23,
    "false_negatives": 70768,
    "total": 83300,
    "normalized_precision": 0.1065863453815261,
    "normalized_recall": 0.0016007906955645436,
    "selection_accuracy": 0.8571428571428571,
    "solvable_selection_accuracy": 0.8602409638554217
  },
  "EurusPRMStage1_min_scores": {
    "accuracy": 0.8510444177671068,
    "precision": 0.8510426295633801,
    "recall": 1.0,
    "f1": 0.9195278552435306,
    "true_positives": 70891,
    "true_negatives": 1,
    "false_positives": 12408,
    "false_negatives": 0,
    "total": 83300,
    "normalized_precision": 0.8541084337349437,
    "normalized_recall": 1.0,
    "selection_accuracy": 0.879951980792317,
    "solvable_selection_accuracy": 0.8831325301204819
  },
  "EurusPRMStage2_avg_scores": {
    "accuracy": 0.8508523409363745,
    "precision": 0.8510730283779078,
    "recall": 0.9996755582514001,
    "f1": 0.9194084068500259,
    "true_positives": 70868,
    "true_negatives": 8,
    "false_positives": 12401,
    "false_negatives": 23,
    "total": 83300,
    "normalized_precision": 0.854157580228152,
    "normalized_recall": 0.9996892446529928,
    "selection_accuracy": 0.879951980792317,
    "solvable_selection_accuracy": 0.8831325301204819
  },
  "EurusPRMStage2_max_scores": {
    "accuracy": 0.15115246098439375,
    "precision": 0.9026548672566371,
    "recall": 0.0028776572484518485,
    "f1": 0.005737024902625251,
    "true_positives": 204,
    "true_negatives": 12387,
    "false_positives": 22,
    "false_negatives": 70687,
    "total": 83300,
    "normalized_precision": 0.16265060240963855,
    "normalized_recall": 0.002611716013681815,
    "selection_accuracy": 0.8691476590636255,
    "solvable_selection_accuracy": 0.8722891566265061
  },
  "EurusPRMStage2_min_scores": {
    "accuracy": 0.8510444177671068,
    "precision": 0.8510426295633801,
    "recall": 1.0,
    "f1": 0.9195278552435306,
    "true_positives": 70891,
    "true_negatives": 1,
    "false_positives": 12408,
    "false_negatives": 0,
    "total": 83300,
    "normalized_precision": 0.8541084337349437,
    "normalized_recall": 1.0,
    "selection_accuracy": 0.8835534213685474,
    "solvable_selection_accuracy": 0.8867469879518072
  },
  "GPM_scores": {
    "accuracy": 0.16224489795918368,
    "precision": 0.8495575221238938,
    "recall": 0.018958683048623942,
    "f1": 0.03708967477543361,
    "true_positives": 1344,
    "true_negatives": 12171,
    "false_positives": 238,
    "false_negatives": 69547,
    "total": 83300,
    "normalized_precision": 0.7212248995983933,
    "normalized_recall": 0.01844088160561274,
    "selection_accuracy": 0.8475390156062425,
    "solvable_selection_accuracy": 0.8506024096385543
  },
  "GRMGemma_scores": {
    "accuracy": 0.5348379351740696,
    "precision": 0.8442687916372127,
    "recall": 0.5559662016334936,
    "f1": 0.6704373415891268,
    "true_positives": 39413,
    "true_negatives": 5139,
    "false_positives": 7270,
    "false_negatives": 31478,
    "total": 83300,
    "normalized_precision": 0.859288711386906,
    "normalized_recall": 0.5515789696347438,
    "selection_accuracy": 0.8259303721488596,
    "solvable_selection_accuracy": 0.8289156626506025
  },
  "GRMLlama32_scores": {
    "accuracy": 0.56,
    "precision": 0.8550953101989173,
    "recall": 0.5815265689579777,
    "f1": 0.6922637739080788,
    "true_positives": 41225,
    "true_negatives": 5423,
    "false_positives": 6986,
    "false_negatives": 29666,
    "total": 83300,
    "normalized_precision": 0.8598551066909418,
    "normalized_recall": 0.5743997029810446,
    "selection_accuracy": 0.8823529411764706,
    "solvable_selection_accuracy": 0.8855421686746988
  },
  "GRM_scores": {
    "accuracy": 0.2887635054021609,
    "precision": 0.8525582803511959,
    "recall": 0.19861477479510797,
    "f1": 0.3221746790838157,
    "true_positives": 14080,
    "true_negatives": 9974,
    "false_positives": 2435,
    "false_negatives": 56811,
    "total": 83300,
    "normalized_precision": 0.7847814471010027,
    "normalized_recall": 0.1898885972110955,
    "selection_accuracy": 0.8871548619447779,
    "solvable_selection_accuracy": 0.8903614457831325
  },
  "INFORM_scores": {
    "accuracy": 0.5372869147659064,
    "precision": 0.8531100582931249,
    "recall": 0.5511983185453725,
    "f1": 0.6697002416576687,
    "true_positives": 39075,
    "true_negatives": 5681,
    "false_positives": 6728,
    "false_negatives": 31816,
    "total": 83300,
    "normalized_precision": 0.8632149599031681,
    "normalized_recall": 0.5382128423017398,
    "selection_accuracy": 0.9027611044417767,
    "solvable_selection_accuracy": 0.9060240963855422
  },
  "InternLM2Reward7B_scores": {
    "accuracy": 0.4467827130852341,
    "precision": 0.8549070100143061,
    "recall": 0.42147804375731757,
    "f1": 0.5646016194102474,
    "true_positives": 29879,
    "true_negatives": 7338,
    "false_positives": 5071,
    "false_negatives": 41012,
    "total": 83300,
    "normalized_precision": 0.8734857852440909,
    "normalized_recall": 0.4181724062020418,
    "selection_accuracy": 0.8931572629051621,
    "solvable_selection_accuracy": 0.8963855421686747
  },
  "InternLM2RewardModel_scores": {
    "accuracy": 0.4454741896758703,
    "precision": 0.8410616145157281,
    "recall": 0.42958908747231667,
    "f1": 0.5687021475256769,
    "true_positives": 30454,
    "true_negatives": 6654,
    "false_positives": 5755,
    "false_negatives": 40437,
    "total": 83300,
    "normalized_precision": 0.8697624246650004,
    "normalized_recall": 0.43511270021026277,
    "selection_accuracy": 0.8787515006002401,
    "solvable_selection_accuracy": 0.8819277108433735
  },
  "LDLRewardGemma_scores": {
    "accuracy": 0.34728691476590634,
    "precision": 0.8606671906383722,
    "recall": 0.27804657855016857,
    "f1": 0.42030855181090276,
    "true_positives": 19711,
    "true_negatives": 9218,
    "false_positives": 3191,
    "false_negatives": 51180,
    "total": 83300,
    "normalized_precision": 0.8479249128862102,
    "normalized_recall": 0.2835000591954594,
    "selection_accuracy": 0.7935174069627852,
    "solvable_selection_accuracy": 0.7963855421686747
  },
  "OffsetBias_scores": {
    "accuracy": 0.5974909963985594,
    "precision": 0.8451901400643039,
    "recall": 0.6452158948244489,
    "f1": 0.7317873113135854,
    "true_positives": 45740,
    "true_negatives": 4031,
    "false_positives": 8378,
    "false_negatives": 25151,
    "total": 83300,
    "normalized_precision": 0.8550072541086092,
    "normalized_recall": 0.6464926922226598,
    "selection_accuracy": 0.8811524609843937,
    "solvable_selection_accuracy": 0.8843373493975903
  },
  "QRMGemma_scores": {
    "accuracy": 0.5787394957983193,
    "precision": 0.8466976564013171,
    "recall": 0.6166509147846694,
    "f1": 0.7135919556647432,
    "true_positives": 43715,
    "true_negatives": 4494,
    "false_positives": 7915,
    "false_negatives": 27176,
    "total": 83300,
    "normalized_precision": 0.8588635594927326,
    "normalized_recall": 0.6147009192587956,
    "selection_accuracy": 0.8967587034813925,
    "solvable_selection_accuracy": 0.9
  },
  "QRM_scores": {
    "accuracy": 0.586890756302521,
    "precision": 0.844968131182267,
    "recall": 0.6302069374109548,
    "f1": 0.721954687954494,
    "true_positives": 44676,
    "true_negatives": 4212,
    "false_positives": 8197,
    "false_negatives": 26215,
    "total": 83300,
    "normalized_precision": 0.8568772726980278,
    "normalized_recall": 0.628446097871175,
    "selection_accuracy": 0.8907563025210085,
    "solvable_selection_accuracy": 0.8939759036144578
  },
  "Qwen72B_scores": {
    "accuracy": 0.8339135654261705,
    "precision": 0.8559415082098119,
    "recall": 0.9677109929328124,
    "f1": 0.9084011414270486,
    "true_positives": 68602,
    "true_negatives": 863,
    "false_positives": 11546,
    "false_negatives": 2289,
    "total": 83300,
    "normalized_precision": 0.8600125063877478,
    "normalized_recall": 0.9663186328770719,
    "selection_accuracy": 0.8667466986794717,
    "solvable_selection_accuracy": 0.8698795180722891
  },
  "QwenPRM_avg_scores": {
    "accuracy": 0.8508523409363745,
    "precision": 0.8513261786585585,
    "recall": 0.9992523733619219,
    "f1": 0.9193770279039585,
    "true_positives": 70838,
    "true_negatives": 38,
    "false_positives": 12371,
    "false_negatives": 53,
    "total": 83300,
    "normalized_precision": 0.8541079486011272,
    "normalized_recall": 0.9989083459720528,
    "selection_accuracy": 0.8763505402160864,
    "solvable_selection_accuracy": 0.8795180722891566
  },
  "QwenPRM_max_scores": {
    "accuracy": 0.8510204081632653,
    "precision": 0.8510474818416471,
    "recall": 0.9999576815110521,
    "f1": 0.9195127962331211,
    "true_positives": 70888,
    "true_negatives": 2,
    "false_positives": 12407,
    "false_negatives": 3,
    "total": 83300,
    "normalized_precision": 0.8541085554338607,
    "normalized_recall": 0.9998031628679219,
    "selection_accuracy": 0.8235294117647058,
    "solvable_selection_accuracy": 0.8265060240963855
  },
  "QwenPRM_min_scores": {
    "accuracy": 0.6272629051620648,
    "precision": 0.8834353466528082,
    "recall": 0.6474446685757007,
    "f1": 0.7472506003500347,
    "true_positives": 45898,
    "true_negatives": 6353,
    "false_positives": 6056,
    "false_negatives": 24993,
    "total": 83300,
    "normalized_precision": 0.8680672897265335,
    "normalized_recall": 0.6258937506284549,
    "selection_accuracy": 0.8775510204081632,
    "solvable_selection_accuracy": 0.880722891566265
  },
  "SkyworksGemma_scores": {
    "accuracy": 0.522172869147659,
    "precision": 0.8546753068954502,
    "recall": 0.5283745468395142,
    "f1": 0.653033116277448,
    "true_positives": 37457,
    "true_negatives": 6040,
    "false_positives": 6369,
    "false_negatives": 33434,
    "total": 83300,
    "normalized_precision": 0.8657401765773676,
    "normalized_recall": 0.5130293106342031,
    "selection_accuracy": 0.8691476590636255,
    "solvable_selection_accuracy": 0.8722891566265061
  },
  "Skyworks_scores": {
    "accuracy": 0.41247298919567826,
    "precision": 0.8548793895104443,
    "recall": 0.3729387369341665,
    "f1": 0.5193238850093795,
    "true_positives": 26438,
    "true_negatives": 7921,
    "false_positives": 4488,
    "false_negatives": 44453,
    "total": 83300,
    "normalized_precision": 0.8698295010388781,
    "normalized_recall": 0.3612132893320998,
    "selection_accuracy": 0.8811524609843937,
    "solvable_selection_accuracy": 0.8843373493975903
  },
  "URM_scores": {
    "accuracy": 0.816530612244898,
    "precision": 0.8510606060606061,
    "recall": 0.9508118096796491,
    "f1": 0.8981751071016917,
    "true_positives": 67404,
    "true_negatives": 613,
    "false_positives": 11796,
    "false_negatives": 3487,
    "total": 83300,
    "normalized_precision": 0.8553396138930492,
    "normalized_recall": 0.9492586659008684,
    "selection_accuracy": 0.8475390156062425,
    "solvable_selection_accuracy": 0.8506024096385543
  }
}