{
  "GPM_scores": {
    "accuracy": 0.8941651831256376,
    "precision": 0.07903780068728522,
    "recall": 0.02134075620505683,
    "f1": 0.033607305936073056,
    "true_positives": 92,
    "true_negatives": 44610,
    "false_positives": 1072,
    "false_negatives": 4219,
    "total": 49993,
    "normalized_precision": 0.09909173478655768,
    "normalized_recall": 0.013363024821502091,
    "selection_accuracy": 0.096,
    "solvable_selection_accuracy": 0.1307901907356948
  },
  "GRMGemma_scores": {
    "accuracy": 0.8972856199867981,
    "precision": 0.2005813953488372,
    "recall": 0.06402226861517049,
    "f1": 0.09706347810796553,
    "true_positives": 276,
    "true_negatives": 44582,
    "false_positives": 1100,
    "false_negatives": 4035,
    "total": 49993,
    "normalized_precision": 0.15327590915964845,
    "normalized_recall": 0.10965442059210835,
    "selection_accuracy": 0.248,
    "solvable_selection_accuracy": 0.33787465940054495
  },
  "GRMLlama32_scores": {
    "accuracy": 0.8842437941311784,
    "precision": 0.19852941176470587,
    "recall": 0.1127348643006263,
    "f1": 0.14380825565912117,
    "true_positives": 486,
    "true_negatives": 43720,
    "false_positives": 1962,
    "false_negatives": 3825,
    "total": 49993,
    "normalized_precision": 0.19550593223085416,
    "normalized_recall": 0.1497481939104391,
    "selection_accuracy": 0.216,
    "solvable_selection_accuracy": 0.29427792915531337
  },
  "GRM_scores": {
    "accuracy": 0.904186586122057,
    "precision": 0.24602332979851538,
    "recall": 0.053815819995360704,
    "f1": 0.08831366577845451,
    "true_positives": 232,
    "true_negatives": 44971,
    "false_positives": 711,
    "false_negatives": 4079,
    "total": 49993,
    "normalized_precision": 0.1262666835717553,
    "normalized_recall": 0.08766110010632208,
    "selection_accuracy": 0.28,
    "solvable_selection_accuracy": 0.3814713896457766
  },
  "OffsetBias_scores": {
    "accuracy": 0.7192406936971176,
    "precision": 0.14359011947518874,
    "recall": 0.45441892832289493,
    "f1": 0.21822435111952765,
    "true_positives": 1959,
    "true_negatives": 33998,
    "false_positives": 11684,
    "false_negatives": 2352,
    "total": 49993,
    "normalized_precision": 0.19081475185999142,
    "normalized_recall": 0.5174526196107888,
    "selection_accuracy": 0.252,
    "solvable_selection_accuracy": 0.34332425068119893
  },
  "QRM_scores": {
    "accuracy": 0.8236753145440362,
    "precision": 0.17219796215429403,
    "recall": 0.27441428902806775,
    "f1": 0.2116089795188266,
    "true_positives": 1183,
    "true_negatives": 39995,
    "false_positives": 5687,
    "false_negatives": 3128,
    "total": 49993,
    "normalized_precision": 0.23609072056101937,
    "normalized_recall": 0.34838530472158,
    "selection_accuracy": 0.276,
    "solvable_selection_accuracy": 0.3760217983651226
  },
  "QwenPRM_avg_scores": {
    "accuracy": 0.0940931730442262,
    "precision": 0.08653186422892198,
    "recall": 0.9946648109487358,
    "f1": 0.15921284693214516,
    "true_positives": 4288,
    "true_negatives": 416,
    "false_positives": 45266,
    "false_negatives": 23,
    "total": 49993,
    "normalized_precision": 0.11853921538554436,
    "normalized_recall": 0.9965871789746019,
    "selection_accuracy": 0.13,
    "solvable_selection_accuracy": 0.1771117166212534
  },
  "QwenPRM_max_scores": {
    "accuracy": 0.09303302462344729,
    "precision": 0.08648942817406728,
    "recall": 0.9953607051728137,
    "f1": 0.15914991469475556,
    "true_positives": 4291,
    "true_negatives": 360,
    "false_positives": 45322,
    "false_negatives": 20,
    "total": 49993,
    "normalized_precision": 0.11848279319838131,
    "normalized_recall": 0.9974403978114721,
    "selection_accuracy": 0.096,
    "solvable_selection_accuracy": 0.1307901907356948
  },
  "QwenPRM_min_scores": {
    "accuracy": 0.27679875182525554,
    "precision": 0.09243331797470947,
    "recall": 0.8376246810484806,
    "f1": 0.1664937639763008,
    "true_positives": 3611,
    "true_negatives": 10227,
    "false_positives": 35455,
    "false_negatives": 700,
    "total": 49993,
    "normalized_precision": 0.12199599656625724,
    "normalized_recall": 0.8319981702671765,
    "selection_accuracy": 0.13,
    "solvable_selection_accuracy": 0.1771117166212534
  },
  "Skyworks_scores": {
    "accuracy": 0.9029864180985337,
    "precision": 0.23076923076923078,
    "recall": 0.05358385525400139,
    "f1": 0.08697289156626506,
    "true_positives": 231,
    "true_negatives": 44912,
    "false_positives": 770,
    "false_negatives": 4080,
    "total": 49993,
    "normalized_precision": 0.15847015378956814,
    "normalized_recall": 0.08258321594399293,
    "selection_accuracy": 0.198,
    "solvable_selection_accuracy": 0.26975476839237056
  },
  "URM_scores": {
    "accuracy": 0.4164182985617986,
    "precision": 0.09568101990373358,
    "recall": 0.6824402690790999,
    "f1": 0.16783136997632564,
    "true_positives": 2942,
    "true_negatives": 17876,
    "false_positives": 27806,
    "false_negatives": 1369,
    "total": 49993,
    "normalized_precision": 0.12910663012363233,
    "normalized_recall": 0.7004232697307617,
    "selection_accuracy": 0.084,
    "solvable_selection_accuracy": 0.11444141689373297
  }
}