{
  "armor_rm_score": {
    "accuracy": 0.31565714285714286,
    "precision": 0.07444146470336069,
    "recall": 0.7155725922376617,
    "f1": 0.13485398493796394,
    "true_positives": 7467,
    "true_negatives": 36725,
    "false_positives": 92840,
    "false_negatives": 2968,
    "total": 140000,
    "normalized_precision": 0.2181636540162902,
    "normalized_recall": 0.6684020754688,
    "selection_accuracy": 0.05,
    "solvable_selection_accuracy": 0.175
  },
  "gpm_scores": {
    "accuracy": 0.8323,
    "precision": 0.005084617135918646,
    "recall": 0.0064206995687589846,
    "f1": 0.005675080467558869,
    "true_positives": 67,
    "true_negatives": 116455,
    "false_positives": 13110,
    "false_negatives": 10368,
    "total": 140000,
    "normalized_precision": 0.018504665932787788,
    "normalized_recall": 0.022641689918437475,
    "selection_accuracy": 0.02857142857142857,
    "solvable_selection_accuracy": 0.1
  },
  "grm_gemma_scores": {
    "accuracy": 0.57955,
    "precision": 0.10207066557107641,
    "recall": 0.5952084331576426,
    "f1": 0.1742582591007926,
    "true_positives": 6211,
    "true_negatives": 74926,
    "false_positives": 54639,
    "false_negatives": 4224,
    "total": 140000,
    "normalized_precision": 0.2973752855721355,
    "normalized_recall": 0.5110814954951673,
    "selection_accuracy": 0.10714285714285714,
    "solvable_selection_accuracy": 0.375
  },
  "grm_llama32_scores": {
    "accuracy": 0.47182142857142856,
    "precision": 0.10844636251541306,
    "recall": 0.8428366075706756,
    "f1": 0.19216693068225266,
    "true_positives": 8795,
    "true_negatives": 57260,
    "false_positives": 72305,
    "false_negatives": 1640,
    "total": 140000,
    "normalized_precision": 0.2746248107884185,
    "normalized_recall": 0.7306193000522738,
    "selection_accuracy": 0.10714285714285714,
    "solvable_selection_accuracy": 0.375
  },
  "grm_scores": {
    "accuracy": 0.5204785714285715,
    "precision": 0.09578806285111358,
    "recall": 0.6437949209391471,
    "f1": 0.16676389181943427,
    "true_positives": 6718,
    "true_negatives": 66149,
    "false_positives": 63416,
    "false_negatives": 3717,
    "total": 140000,
    "normalized_precision": 0.2269558118908243,
    "normalized_recall": 0.5456856869491205,
    "selection_accuracy": 0.06428571428571428,
    "solvable_selection_accuracy": 0.225
  },
  "internlm_scores": {
    "accuracy": 0.30301428571428574,
    "precision": 0.08189957107078771,
    "recall": 0.8179204599904168,
    "f1": 0.14889051706091688,
    "true_positives": 8535,
    "true_negatives": 33887,
    "false_positives": 95678,
    "false_negatives": 1900,
    "total": 140000,
    "normalized_precision": 0.25870805965298194,
    "normalized_recall": 0.7949721256866207,
    "selection_accuracy": 0.07142857142857142,
    "solvable_selection_accuracy": 0.25
  },
  "offset_bias_scores": {
    "accuracy": 0.17923571428571428,
    "precision": 0.0822523632059628,
    "recall": 0.9856252994729277,
    "f1": 0.15183389062350067,
    "true_positives": 10285,
    "true_negatives": 14808,
    "false_positives": 114757,
    "false_negatives": 150,
    "total": 140000,
    "normalized_precision": 0.26313783950689956,
    "normalized_recall": 0.9127272184514832,
    "selection_accuracy": 0.07142857142857142,
    "solvable_selection_accuracy": 0.25
  },
  "qrm_scores": {
    "accuracy": 0.22767857142857142,
    "precision": 0.08733229698219053,
    "recall": 0.9906085289889794,
    "f1": 0.1605136685843834,
    "true_positives": 10337,
    "true_negatives": 21538,
    "false_positives": 108027,
    "false_negatives": 98,
    "total": 140000,
    "normalized_precision": 0.26956298496846326,
    "normalized_recall": 0.9855411821672397,
    "selection_accuracy": 0.09285714285714286,
    "solvable_selection_accuracy": 0.325
  },
  "skyworks_scores": {
    "accuracy": 0.3003142857142857,
    "precision": 0.09153871283894152,
    "recall": 0.9398179204599905,
    "f1": 0.16682827251849963,
    "true_positives": 9807,
    "true_negatives": 32237,
    "false_positives": 97328,
    "false_negatives": 628,
    "total": 140000,
    "normalized_precision": 0.2752940683809898,
    "normalized_recall": 0.8780046761903876,
    "selection_accuracy": 0.08571428571428572,
    "solvable_selection_accuracy": 0.3
  },
  "urm_scores": {
    "accuracy": 0.9254142857142857,
    "precision": 0.0,
    "recall": 0.0,
    "f1": 0,
    "true_positives": 0,
    "true_negatives": 129558,
    "false_positives": 7,
    "false_negatives": 10435,
    "total": 140000,
    "normalized_precision": 0.0,
    "normalized_recall": 0.0,
    "selection_accuracy": 0.14285714285714285,
    "solvable_selection_accuracy": 0.5
  }
}