{
  "entropy_drop": {
    "overall_accuracy": 20.3,
    "error_accuracy": 34.17508417508417,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 34.17508417508417,
    "auc_roc": 0.7229188436084988,
    "auc_pr": 0.7895157753337021,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "length_norm_logprob": {
    "overall_accuracy": 8.1,
    "error_accuracy": 13.636363636363635,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 13.636363636363635,
    "auc_roc": 0.39658075002902593,
    "auc_pr": 0.5355910181983126,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "mean_logprob": {
    "overall_accuracy": 8.1,
    "error_accuracy": 13.636363636363635,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 13.636363636363635,
    "auc_roc": 0.39658075002902593,
    "auc_pr": 0.5355910181983126,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "perplexity": {
    "overall_accuracy": 8.4,
    "error_accuracy": 14.14141414141414,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 14.14141414141414,
    "auc_roc": 0.5799248644076231,
    "auc_pr": 0.6681061686635991,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "confidence": {
    "overall_accuracy": 12.1,
    "error_accuracy": 20.37037037037037,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 20.37037037037037,
    "auc_roc": 0.39882403675507117,
    "auc_pr": 0.5374651920963738,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "evidence_drop": {
    "overall_accuracy": 15.0,
    "error_accuracy": 25.252525252525253,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 25.252525252525253,
    "auc_roc": 0.5785772337496475,
    "auc_pr": 0.6644946787426053,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "mahuan_risk": {
    "overall_accuracy": 15.0,
    "error_accuracy": 25.252525252525253,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 25.252525252525253,
    "auc_roc": 0.5785772337496475,
    "auc_pr": 0.6644946787426053,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "entropy": {
    "overall_accuracy": 12.1,
    "error_accuracy": 20.37037037037037,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 20.37037037037037,
    "auc_roc": 0.7051093861438689,
    "auc_pr": 0.7760815522109685,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "evidence": {
    "overall_accuracy": 12.5,
    "error_accuracy": 21.043771043771045,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 21.043771043771045,
    "auc_roc": 0.727413710172331,
    "auc_pr": 0.7931939371859555,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "mahuan": {
    "overall_accuracy": 12.5,
    "error_accuracy": 21.043771043771045,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 21.043771043771045,
    "auc_roc": 0.727413710172331,
    "auc_pr": 0.7931939371859555,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  }
}