{
  "entropy_drop": {
    "overall_accuracy": 20.5,
    "error_accuracy": 39.61352657004831,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 39.61352657004831,
    "auc_roc": 0.6470926885434658,
    "auc_pr": 0.6644973341516849,
    "num_samples": 400,
    "num_errors": 207,
    "num_correct": 193
  },
  "length_norm_logprob": {
    "overall_accuracy": 9.25,
    "error_accuracy": 17.874396135265698,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 17.874396135265698,
    "auc_roc": 0.37315711746889935,
    "auc_pr": 0.43808846220150566,
    "num_samples": 400,
    "num_errors": 207,
    "num_correct": 193
  },
  "mean_logprob": {
    "overall_accuracy": 9.25,
    "error_accuracy": 17.874396135265698,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 17.874396135265698,
    "auc_roc": 0.37315711746889935,
    "auc_pr": 0.43808846220150566,
    "num_samples": 400,
    "num_errors": 207,
    "num_correct": 193
  },
  "perplexity": {
    "overall_accuracy": 9.25,
    "error_accuracy": 17.874396135265698,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 17.874396135265698,
    "auc_roc": 0.5173587644864959,
    "auc_pr": 0.507730129044779,
    "num_samples": 400,
    "num_errors": 207,
    "num_correct": 193
  },
  "confidence": {
    "overall_accuracy": 11.25,
    "error_accuracy": 21.73913043478261,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 21.73913043478261,
    "auc_roc": 0.36574804135065453,
    "auc_pr": 0.43476189194143267,
    "num_samples": 400,
    "num_errors": 207,
    "num_correct": 193
  },
  "evidence_drop": {
    "overall_accuracy": 18.25,
    "error_accuracy": 35.26570048309179,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 35.26570048309179,
    "auc_roc": 0.5207003579384747,
    "auc_pr": 0.5461610429735129,
    "num_samples": 400,
    "num_errors": 207,
    "num_correct": 193
  },
  "mahuan_risk": {
    "overall_accuracy": 18.25,
    "error_accuracy": 35.26570048309179,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 35.26570048309179,
    "auc_roc": 0.5207003579384747,
    "auc_pr": 0.5461610429735129,
    "num_samples": 400,
    "num_errors": 207,
    "num_correct": 193
  },
  "entropy": {
    "overall_accuracy": 11.25,
    "error_accuracy": 21.73913043478261,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 21.73913043478261,
    "auc_roc": 0.63905784586118,
    "auc_pr": 0.6566887122813445,
    "num_samples": 400,
    "num_errors": 207,
    "num_correct": 193
  },
  "evidence": {
    "overall_accuracy": 11.0,
    "error_accuracy": 21.256038647342994,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 21.256038647342994,
    "auc_roc": 0.6258917173537584,
    "auc_pr": 0.6430164920836607,
    "num_samples": 400,
    "num_errors": 207,
    "num_correct": 193
  },
  "mahuan": {
    "overall_accuracy": 11.0,
    "error_accuracy": 21.256038647342994,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 21.256038647342994,
    "auc_roc": 0.6258917173537584,
    "auc_pr": 0.6430164920836607,
    "num_samples": 400,
    "num_errors": 207,
    "num_correct": 193
  }
}