{
  "model": "llama-4-scout-17b-16e-instruct",
  "dataset": "H2",
  "n_samples": 162,
  "n_harmful": 81,
  "n_benign": 81,
  "original_se_by_tau": {
    "0.1": {
      "auroc": 0.6912818167962201,
      "fnr_at_5fpr": 0.654320987654321,
      "fnr_ci_lower": 0.5458937528984922,
      "fnr_ci_upper": 0.7487735046840958,
      "threshold": 0.9709505944546686,
      "fpr_used": 0.037037037037037035
    },
    "0.2": {
      "auroc": 0.6172839506172839,
      "fnr_at_5fpr": 0.7654320987654322,
      "fnr_ci_lower": 0.6624527035476533,
      "fnr_ci_upper": 0.8443749794943978,
      "threshold": 0.7219280948873623,
      "fpr_used": 0.0
    },
    "0.3": {
      "auroc": 0.5864197530864197,
      "fnr_at_5fpr": 0.8271604938271605,
      "fnr_ci_lower": 0.7305386248522876,
      "fnr_ci_upper": 0.8941559612227987,
      "threshold": 0.7219280948873623,
      "fpr_used": 0.0
    },
    "0.4": {
      "auroc": 0.5679012345679012,
      "fnr_at_5fpr": 0.8641975308641976,
      "fnr_ci_lower": 0.7729693623122125,
      "fnr_ci_upper": 0.9224453655826947,
      "threshold": 0.7219280948873623,
      "fpr_used": 0.0
    }
  },
  "best_tau": 0.1,
  "length_models": {
    "0.1": {
      "r2": 0.10272718440690964,
      "intercept": 1.4726338930768699,
      "slope": -0.17382972731599464
    },
    "0.2": {
      "r2": 1.0,
      "intercept": 0.0,
      "slope": 0.0
    },
    "0.3": {
      "r2": 1.0,
      "intercept": 0.0,
      "slope": 0.0
    },
    "0.4": {
      "r2": 1.0,
      "intercept": 0.0,
      "slope": 0.0
    }
  },
  "residual_se_by_tau": {
    "0.1": {
      "auroc": 0.6300868770004573,
      "fnr_at_5fpr": 0.691358024691358,
      "fnr_ci_lower": 0.5840194691734771,
      "fnr_ci_upper": 0.7813679302289317,
      "threshold": 0.6678249301156158,
      "fpr_used": 0.04938271604938271,
      "n_samples_evaluated": 162,
      "auroc_drop": 0.06119493979576285,
      "fnr_increase": 0.03703703703703698,
      "h3_supported": false
    },
    "0.2": {
      "auroc": 0.6172839506172839,
      "fnr_at_5fpr": 0.7654320987654322,
      "fnr_ci_lower": 0.6624527035476533,
      "fnr_ci_upper": 0.8443749794943978,
      "threshold": 0.7219280948873623,
      "fpr_used": 0.0,
      "n_samples_evaluated": 162,
      "auroc_drop": 0.0,
      "fnr_increase": 0.0,
      "h3_supported": false
    },
    "0.3": {
      "auroc": 0.5864197530864197,
      "fnr_at_5fpr": 0.8271604938271605,
      "fnr_ci_lower": 0.7305386248522876,
      "fnr_ci_upper": 0.8941559612227987,
      "threshold": 0.7219280948873623,
      "fpr_used": 0.0,
      "n_samples_evaluated": 162,
      "auroc_drop": 0.0,
      "fnr_increase": 0.0,
      "h3_supported": false
    },
    "0.4": {
      "auroc": 0.5679012345679012,
      "fnr_at_5fpr": 0.8641975308641976,
      "fnr_ci_lower": 0.7729693623122125,
      "fnr_ci_upper": 0.9224453655826947,
      "threshold": 0.7219280948873623,
      "fpr_used": 0.0,
      "n_samples_evaluated": 162,
      "auroc_drop": 0.0,
      "fnr_increase": 0.0,
      "h3_supported": false
    }
  },
  "h3_supported": false,
  "baselines": {
    "avg_pairwise_bertscore": {
      "auroc": 0.5057155921353451,
      "fnr_at_5fpr": 0.7407407407407407,
      "fnr_ci_lower": 0.6359565111747224,
      "fnr_ci_upper": 0.8237244106541145
    },
    "embedding_variance": {
      "auroc": 0.6837372351775645,
      "fnr_at_5fpr": 0.6049382716049383,
      "fnr_ci_lower": 0.4960547732360617,
      "fnr_ci_upper": 0.7043189619200981
    },
    "levenshtein_variance": {
      "auroc": 0.3968907178783722,
      "fnr_at_5fpr": 0.9259259259259259,
      "fnr_ci_lower": 0.8476712613403236,
      "fnr_ci_upper": 0.9656103695876189
    }
  },
  "acceptance_threshold": 0.55
}