{
  "ArmorRM_scores": {
    "accuracy": 0.4502086230876217,
    "precision": 0.8275611847747213,
    "recall": 0.4224474676724138,
    "f1": 0.5593579311113588,
    "true_positives": 25090,
    "true_negatives": 7280,
    "false_positives": 5228,
    "false_negatives": 34302,
    "total": 71900,
    "normalized_precision": 0.7779163557983383,
    "normalized_recall": 0.42468525212559566,
    "selection_accuracy": 0.8428372739916551,
    "solvable_selection_accuracy": 0.8782608695652174
  },
  "EurusPRMStage1_avg_scores": {
    "accuracy": 0.7133657858136301,
    "precision": 0.8402913047293147,
    "recall": 0.8062365301724138,
    "f1": 0.8229117437294311,
    "true_positives": 47884,
    "true_negatives": 3407,
    "false_positives": 9101,
    "false_negatives": 11508,
    "total": 71900,
    "normalized_precision": 0.861679217483093,
    "normalized_recall": 0.7960329995859398,
    "selection_accuracy": 0.8372739916550765,
    "solvable_selection_accuracy": 0.8724637681159421
  },
  "EurusPRMStage1_max_scores": {
    "accuracy": 0.17603616133518776,
    "precision": 0.9515151515151515,
    "recall": 0.0026434536637931034,
    "f1": 0.005272260187719327,
    "true_positives": 157,
    "true_negatives": 12500,
    "false_positives": 8,
    "false_negatives": 59235,
    "total": 71900,
    "normalized_precision": 0.14347826086956522,
    "normalized_recall": 0.0023473764920159423,
    "selection_accuracy": 0.8219749652294854,
    "solvable_selection_accuracy": 0.8565217391304348
  },
  "EurusPRMStage1_min_scores": {
    "accuracy": 0.8258970792767732,
    "precision": 0.8260301032189856,
    "recall": 0.9997979525862069,
    "f1": 0.904645104282515,
    "true_positives": 59380,
    "true_negatives": 2,
    "false_positives": 12506,
    "false_negatives": 12,
    "total": 71900,
    "normalized_precision": 0.8607458644415171,
    "normalized_recall": 0.9997626096252364,
    "selection_accuracy": 0.8442280945757997,
    "solvable_selection_accuracy": 0.8797101449275362
  },
  "EurusPRMStage2_avg_scores": {
    "accuracy": 0.8249930458970792,
    "precision": 0.8270018023556369,
    "recall": 0.9966157058189655,
    "f1": 0.9039208948955828,
    "true_positives": 59191,
    "true_negatives": 126,
    "false_positives": 12382,
    "false_negatives": 201,
    "total": 71900,
    "normalized_precision": 0.8609912245474303,
    "normalized_recall": 0.9959902480361879,
    "selection_accuracy": 0.8386648122392212,
    "solvable_selection_accuracy": 0.8739130434782608
  },
  "EurusPRMStage2_max_scores": {
    "accuracy": 0.18842837273991656,
    "precision": 0.8909774436090225,
    "recall": 0.019952182112068964,
    "f1": 0.03903033496920391,
    "true_positives": 1185,
    "true_negatives": 12363,
    "false_positives": 145,
    "false_negatives": 58207,
    "total": 71900,
    "normalized_precision": 0.3638462656082794,
    "normalized_recall": 0.02072841214368507,
    "selection_accuracy": 0.8400556328233658,
    "solvable_selection_accuracy": 0.8753623188405797
  },
  "EurusPRMStage2_min_scores": {
    "accuracy": 0.8259527121001391,
    "precision": 0.826130513427021,
    "recall": 0.9996969288793104,
    "f1": 0.9046639545336808,
    "true_positives": 59374,
    "true_negatives": 12,
    "false_positives": 12496,
    "false_negatives": 18,
    "total": 71900,
    "normalized_precision": 0.8607934716620129,
    "normalized_recall": 0.9996313260535551,
    "selection_accuracy": 0.847009735744089,
    "solvable_selection_accuracy": 0.8826086956521739
  },
  "GPM_scores": {
    "accuracy": 0.18741307371349095,
    "precision": 0.8286879673691366,
    "recall": 0.02052464978448276,
    "f1": 0.040057177595583526,
    "true_positives": 1219,
    "true_negatives": 12256,
    "false_positives": 252,
    "false_negatives": 58173,
    "total": 71900,
    "normalized_precision": 0.7529227053140093,
    "normalized_recall": 0.021338845297803933,
    "selection_accuracy": 0.8414464534075105,
    "solvable_selection_accuracy": 0.8768115942028986
  },
  "GRMGemma_scores": {
    "accuracy": 0.5199860917941586,
    "precision": 0.831005029137064,
    "recall": 0.5258283943965517,
    "f1": 0.644096810452394,
    "true_positives": 31230,
    "true_negatives": 6157,
    "false_positives": 6351,
    "false_negatives": 28162,
    "total": 71900,
    "normalized_precision": 0.8636742464507571,
    "normalized_recall": 0.521298121858454,
    "selection_accuracy": 0.8400556328233658,
    "solvable_selection_accuracy": 0.8753623188405797
  },
  "GRMLlama32_scores": {
    "accuracy": 0.3300695410292072,
    "precision": 0.8560461870321026,
    "recall": 0.22718547952586207,
    "f1": 0.3590760305506028,
    "true_positives": 13493,
    "true_negatives": 10239,
    "false_positives": 2269,
    "false_negatives": 45899,
    "total": 71900,
    "normalized_precision": 0.7122430020292697,
    "normalized_recall": 0.22450592077072956,
    "selection_accuracy": 0.8358831710709318,
    "solvable_selection_accuracy": 0.8710144927536232
  },
  "GRM_scores": {
    "accuracy": 0.36417246175243395,
    "precision": 0.8804384110381662,
    "recall": 0.2664500269396552,
    "f1": 0.40909443424760233,
    "true_positives": 15825,
    "true_negatives": 10359,
    "false_positives": 2149,
    "false_negatives": 43567,
    "total": 71900,
    "normalized_precision": 0.7639665850166639,
    "normalized_recall": 0.2523718835908313,
    "selection_accuracy": 0.8372739916550765,
    "solvable_selection_accuracy": 0.8724637681159421
  },
  "INFORM_scores": {
    "accuracy": 0.5488873435326843,
    "precision": 0.842589533080853,
    "recall": 0.5581559806034483,
    "f1": 0.6714944042132982,
    "true_positives": 33150,
    "true_negatives": 6315,
    "false_positives": 6193,
    "false_negatives": 26242,
    "total": 71900,
    "normalized_precision": 0.8664437600492706,
    "normalized_recall": 0.5481085930431401,
    "selection_accuracy": 0.847009735744089,
    "solvable_selection_accuracy": 0.8826086956521739
  },
  "InternLM2RewardModel_scores": {
    "accuracy": 0.4912239221140473,
    "precision": 0.8792352452202826,
    "recall": 0.4452283135775862,
    "f1": 0.5911229838935026,
    "true_positives": 26443,
    "true_negatives": 8876,
    "false_positives": 3632,
    "false_negatives": 32949,
    "total": 71900,
    "normalized_precision": 0.8695666460142488,
    "normalized_recall": 0.4239221408426394,
    "selection_accuracy": 0.8428372739916551,
    "solvable_selection_accuracy": 0.8782608695652174
  },
  "LDLRewardGemma_scores": {
    "accuracy": 0.6653963838664813,
    "precision": 0.857617100522246,
    "recall": 0.7133620689655172,
    "f1": 0.7788664816074417,
    "true_positives": 42368,
    "true_negatives": 5474,
    "false_positives": 7034,
    "false_negatives": 17024,
    "total": 71900,
    "normalized_precision": 0.8608484483221568,
    "normalized_recall": 0.6977243102021046,
    "selection_accuracy": 0.8191933240611962,
    "solvable_selection_accuracy": 0.8536231884057971
  },
  "OffsetBias_scores": {
    "accuracy": 0.5856050069541029,
    "precision": 0.8390030467550913,
    "recall": 0.6166655441810345,
    "f1": 0.7108544810519676,
    "true_positives": 36625,
    "true_negatives": 5480,
    "false_positives": 7028,
    "false_negatives": 22767,
    "total": 71900,
    "normalized_precision": 0.8629155296833989,
    "normalized_recall": 0.6089338286215652,
    "selection_accuracy": 0.8442280945757997,
    "solvable_selection_accuracy": 0.8797101449275362
  },
  "QRMGemma_scores": {
    "accuracy": 0.6579415855354659,
    "precision": 0.8390824758341129,
    "recall": 0.7249292834051724,
    "f1": 0.7778400057811822,
    "true_positives": 43055,
    "true_negatives": 4251,
    "false_positives": 8257,
    "false_negatives": 16337,
    "total": 71900,
    "normalized_precision": 0.8642225223142411,
    "normalized_recall": 0.7153347339000629,
    "selection_accuracy": 0.8414464534075105,
    "solvable_selection_accuracy": 0.8768115942028986
  },
  "QRM_scores": {
    "accuracy": 0.59105702364395,
    "precision": 0.8281144007527517,
    "recall": 0.6371901939655172,
    "f1": 0.7202139098495589,
    "true_positives": 37844,
    "true_negatives": 4653,
    "false_positives": 7855,
    "false_negatives": 21548,
    "total": 71900,
    "normalized_precision": 0.8622942028680426,
    "normalized_recall": 0.6348574118945775,
    "selection_accuracy": 0.8456189151599444,
    "solvable_selection_accuracy": 0.881159420289855
  },
  "Qwen72B_scores": {
    "accuracy": 0.8307093184979137,
    "precision": 0.8721040189125295,
    "recall": 0.9316911368534483,
    "f1": 0.900913368391918,
    "true_positives": 55335,
    "true_negatives": 4393,
    "false_positives": 8115,
    "false_negatives": 4057,
    "total": 71900,
    "normalized_precision": 0.8763839325475962,
    "normalized_recall": 0.9004881297533568,
    "selection_accuracy": 0.8692628650904033,
    "solvable_selection_accuracy": 0.9057971014492754
  },
  "QwenPRM_avg_scores": {
    "accuracy": 0.8261752433936023,
    "precision": 0.8262963066046926,
    "recall": 0.9997306034482759,
    "f1": 0.9047771428571428,
    "true_positives": 59376,
    "true_negatives": 26,
    "false_positives": 12482,
    "false_negatives": 16,
    "total": 71900,
    "normalized_precision": 0.8608257923055281,
    "normalized_recall": 0.9989292333220632,
    "selection_accuracy": 0.8386648122392212,
    "solvable_selection_accuracy": 0.8739130434782608
  },
  "QwenPRM_max_scores": {
    "accuracy": 0.8260361613351878,
    "precision": 0.8260452307435534,
    "recall": 0.9999831627155172,
    "f1": 0.9047299870515653,
    "true_positives": 59391,
    "true_negatives": 1,
    "false_positives": 12507,
    "false_negatives": 1,
    "total": 71900,
    "normalized_precision": 0.8607587468891822,
    "normalized_recall": 0.9999816547422492,
    "selection_accuracy": 0.8289290681502086,
    "solvable_selection_accuracy": 0.863768115942029
  },
  "QwenPRM_min_scores": {
    "accuracy": 0.7645757997218359,
    "precision": 0.9096012500723422,
    "recall": 0.7938948006465517,
    "f1": 0.8478184646090497,
    "true_positives": 47151,
    "true_negatives": 7822,
    "false_positives": 4686,
    "false_negatives": 12241,
    "total": 71900,
    "normalized_precision": 0.8729921045702749,
    "normalized_recall": 0.7270969134986766,
    "selection_accuracy": 0.8456189151599444,
    "solvable_selection_accuracy": 0.881159420289855
  },
  "SkyworksGemma_scores": {
    "accuracy": 0.5389986091794159,
    "precision": 0.8468756608162402,
    "recall": 0.5394497575431034,
    "f1": 0.6590759483255163,
    "true_positives": 32039,
    "true_negatives": 6715,
    "false_positives": 5793,
    "false_negatives": 27353,
    "total": 71900,
    "normalized_precision": 0.867791058304462,
    "normalized_recall": 0.5297546101081386,
    "selection_accuracy": 0.8456189151599444,
    "solvable_selection_accuracy": 0.881159420289855
  },
  "Skyworks_scores": {
    "accuracy": 0.48424200278164115,
    "precision": 0.8734473869228967,
    "recall": 0.43926791487068967,
    "f1": 0.5845554049360864,
    "true_positives": 26089,
    "true_negatives": 8728,
    "false_positives": 3780,
    "false_negatives": 33303,
    "total": 71900,
    "normalized_precision": 0.867733731868921,
    "normalized_recall": 0.42335480026746647,
    "selection_accuracy": 0.8511821974965229,
    "solvable_selection_accuracy": 0.8869565217391304
  },
  "URM_scores": {
    "accuracy": 0.548414464534075,
    "precision": 0.8501131368826237,
    "recall": 0.5503434806034483,
    "f1": 0.66814525607874,
    "true_positives": 32686,
    "true_negatives": 6745,
    "false_positives": 5763,
    "false_negatives": 26706,
    "total": 71900,
    "normalized_precision": 0.8688000865797377,
    "normalized_recall": 0.5398345825260612,
    "selection_accuracy": 0.8456189151599444,
    "solvable_selection_accuracy": 0.881159420289855
  }
}