{
  "entropy_drop": {
    "overall_accuracy": 19.0,
    "error_accuracy": 31.986531986531986,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 31.986531986531986,
    "auc_roc": 0.6898666467631985,
    "auc_pr": 0.7598427380243296,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "length_norm_logprob": {
    "overall_accuracy": 11.200000000000001,
    "error_accuracy": 18.855218855218855,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 18.855218855218855,
    "auc_roc": 0.39791179446351865,
    "auc_pr": 0.5438668909283416,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "mean_logprob": {
    "overall_accuracy": 11.200000000000001,
    "error_accuracy": 18.855218855218855,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 18.855218855218855,
    "auc_roc": 0.39791179446351865,
    "auc_pr": 0.5438668909283416,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "perplexity": {
    "overall_accuracy": 11.1,
    "error_accuracy": 18.68686868686869,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 18.68686868686869,
    "auc_roc": 0.5440156905674147,
    "auc_pr": 0.6129222571241417,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "confidence": {
    "overall_accuracy": 13.5,
    "error_accuracy": 22.727272727272727,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 22.727272727272727,
    "auc_roc": 0.41870262559917737,
    "auc_pr": 0.5534479932752212,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "evidence_drop": {
    "overall_accuracy": 17.4,
    "error_accuracy": 29.292929292929294,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 29.292929292929294,
    "auc_roc": 0.5685508616543099,
    "auc_pr": 0.657388801226625,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "mahuan_risk": {
    "overall_accuracy": 17.4,
    "error_accuracy": 29.292929292929294,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 29.292929292929294,
    "auc_roc": 0.5685508616543099,
    "auc_pr": 0.657388801226625,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "entropy": {
    "overall_accuracy": 13.5,
    "error_accuracy": 22.727272727272727,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 22.727272727272727,
    "auc_roc": 0.6221658290623808,
    "auc_pr": 0.6970636289970615,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "evidence": {
    "overall_accuracy": 13.3,
    "error_accuracy": 22.39057239057239,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 22.39057239057239,
    "auc_roc": 0.6443001443001443,
    "auc_pr": 0.715399054307973,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  },
  "mahuan": {
    "overall_accuracy": 13.3,
    "error_accuracy": 22.39057239057239,
    "correct_accuracy": 0.0,
    "f1_score": 0.0,
    "error_position_accuracy": 22.39057239057239,
    "auc_roc": 0.6443001443001443,
    "auc_pr": 0.715399054307973,
    "num_samples": 1000,
    "num_errors": 594,
    "num_correct": 406
  }
}