{
  "entropy_drop": {
    "overall_accuracy": 20.8,
    "error_accuracy": 35.01683501683502,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 35.01683501683502,
    "auc_roc": 0.7330530261564744,
    "auc_pr": 0.802102636336649,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "length_norm_logprob": {
    "overall_accuracy": 8.6,
    "error_accuracy": 14.47811447811448,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 14.47811447811448,
    "auc_roc": 0.38467184156839324,
    "auc_pr": 0.525589860951734,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "mean_logprob": {
    "overall_accuracy": 8.6,
    "error_accuracy": 14.47811447811448,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 14.47811447811448,
    "auc_roc": 0.38467184156839324,
    "auc_pr": 0.525589860951734,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "perplexity": {
    "overall_accuracy": 8.799999999999999,
    "error_accuracy": 14.814814814814813,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 14.814814814814813,
    "auc_roc": 0.5985097278200726,
    "auc_pr": 0.6687157571872506,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "confidence": {
    "overall_accuracy": 12.4,
    "error_accuracy": 20.875420875420875,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 20.875420875420875,
    "auc_roc": 0.39655172413793105,
    "auc_pr": 0.5357506723050809,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "evidence_drop": {
    "overall_accuracy": 15.2,
    "error_accuracy": 25.589225589225588,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 25.589225589225588,
    "auc_roc": 0.5826491516146688,
    "auc_pr": 0.6707273089503005,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "mahuan_risk": {
    "overall_accuracy": 15.2,
    "error_accuracy": 25.589225589225588,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 25.589225589225588,
    "auc_roc": 0.5826491516146688,
    "auc_pr": 0.6707273089503005,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "entropy": {
    "overall_accuracy": 12.4,
    "error_accuracy": 20.875420875420875,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 20.875420875420875,
    "auc_roc": 0.7005606143537179,
    "auc_pr": 0.7782770066015199,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "evidence": {
    "overall_accuracy": 12.4,
    "error_accuracy": 20.875420875420875,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 20.875420875420875,
    "auc_roc": 0.7352880197707783,
    "auc_pr": 0.8011520898871147,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "mahuan": {
    "overall_accuracy": 12.4,
    "error_accuracy": 20.875420875420875,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 20.875420875420875,
    "auc_roc": 0.7352880197707783,
    "auc_pr": 0.8011520898871147,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  }
}