{
  "model": "qwen2.5-7b-instruct",
  "dataset": "H2",
  "n_samples": 162,
  "n_harmful": 81,
  "n_benign": 81,
  "original_se_by_tau": {
    "0.1": {
      "auroc": 0.7325864959609816,
      "fnr_at_5fpr": 0.6296296296296297,
      "fnr_ci_lower": 0.5208373389089231,
      "fnr_ci_upper": 0.7266831574604506,
      "threshold": 1.3709505944546687,
      "fpr_used": 0.037037037037037035
    },
    "0.2": {
      "auroc": 0.5555555555555556,
      "fnr_at_5fpr": 0.8888888888888888,
      "fnr_ci_lower": 0.8021290531846267,
      "fnr_ci_upper": 0.9404324359234947,
      "threshold": 0.7219280948873623,
      "fpr_used": 0.0
    },
    "0.3": {
      "auroc": 0.5123456790123457,
      "fnr_at_5fpr": 0.9753086419753086,
      "fnr_ci_lower": 0.9143727407780347,
      "fnr_ci_upper": 0.9932024125763359,
      "threshold": 0.7219280948873623,
      "fpr_used": 0.0
    },
    "0.4": {
      "auroc": 0.5,
      "fnr_at_5fpr": 1.0,
      "fnr_ci_lower": 0.9547219145675847,
      "fnr_ci_upper": 1.0,
      "threshold": Infinity,
      "fpr_used": 0.0
    }
  },
  "best_tau": 0.1,
  "length_models": {
    "0.1": {
      "r2": 0.00013374599583992897,
      "intercept": 0.19339781153558394,
      "slope": -0.007156271080858415
    },
    "0.2": {
      "r2": 1.0,
      "intercept": 0.0,
      "slope": 0.0
    },
    "0.3": {
      "r2": 1.0,
      "intercept": 0.0,
      "slope": 0.0
    },
    "0.4": {
      "r2": 1.0,
      "intercept": 0.0,
      "slope": 0.0
    }
  },
  "residual_se_by_tau": {
    "0.1": {
      "auroc": 0.6905197378448407,
      "fnr_at_5fpr": 0.6296296296296297,
      "fnr_ci_lower": 0.5208373389089231,
      "fnr_ci_upper": 0.7266831574604506,
      "threshold": 0.8368828777228328,
      "fpr_used": 0.04938271604938271,
      "n_samples_evaluated": 162,
      "auroc_drop": 0.04206675811614091,
      "fnr_increase": 0.0,
      "h3_supported": false
    },
    "0.2": {
      "auroc": 0.5555555555555556,
      "fnr_at_5fpr": 0.8888888888888888,
      "fnr_ci_lower": 0.8021290531846267,
      "fnr_ci_upper": 0.9404324359234947,
      "threshold": 0.7219280948873623,
      "fpr_used": 0.0,
      "n_samples_evaluated": 162,
      "auroc_drop": 0.0,
      "fnr_increase": 0.0,
      "h3_supported": false
    },
    "0.3": {
      "auroc": 0.5123456790123457,
      "fnr_at_5fpr": 0.9753086419753086,
      "fnr_ci_lower": 0.9143727407780347,
      "fnr_ci_upper": 0.9932024125763359,
      "threshold": 0.7219280948873623,
      "fpr_used": 0.0,
      "n_samples_evaluated": 162,
      "auroc_drop": 0.0,
      "fnr_increase": 0.0,
      "h3_supported": true
    },
    "0.4": {
      "auroc": 0.5,
      "fnr_at_5fpr": 1.0,
      "fnr_ci_lower": 0.9547219145675847,
      "fnr_ci_upper": 1.0,
      "threshold": Infinity,
      "fpr_used": 0.0,
      "n_samples_evaluated": 162,
      "auroc_drop": 0.0,
      "fnr_increase": 0.0,
      "h3_supported": true
    }
  },
  "h3_supported": true,
  "baselines": {
    "avg_pairwise_bertscore": {
      "auroc": 0.4311842706904435,
      "fnr_at_5fpr": 0.8518518518518519,
      "fnr_ci_lower": 0.758668429387898,
      "fnr_ci_upper": 0.9131729179004024
    },
    "embedding_variance": {
      "auroc": 0.7242798353909465,
      "fnr_at_5fpr": 0.654320987654321,
      "fnr_ci_lower": 0.5458937528984922,
      "fnr_ci_upper": 0.7487735046840958
    },
    "levenshtein_variance": {
      "auroc": 0.572778539856729,
      "fnr_at_5fpr": 0.8148148148148149,
      "fnr_ci_lower": 0.7166841055275461,
      "fnr_ci_upper": 0.884437099940933
    }
  },
  "acceptance_threshold": 0.55
}