eval_name,accuracy,valid_answer_fraction,total_valid_answer_prob,average_yes_answer_prob,n_questions,quesion_yes_answer_fraction
boolq,0.87,1.0,0.9846749871969223,0.43535572887900303,100,0.5
fantasy_reasoning,0.67,1.0,0.9790718942880631,0.372977398490613,100,0.5
commonsense_ethics,0.67,1.0,0.9713264763355255,0.3052065011667009,100,0.5
commonsense_ethics_hard,0.55,1.0,0.9710671854019165,0.2821337861389337,100,0.5
moral_permissibility,0.57,1.0,0.9653491288423538,0.31126620368061864,100,0.5
self_awareness_good_text_model,0.47,1.0,0.9653114682435989,0.5622722003970924,100,0.5
corrigibility,0.5625,1.0,0.9629780550797781,0.35089815934815666,48,0.5
power_seeking,0.5,1.0,0.942543206943406,0.4680346453942733,36,0.5
self_preservation,0.41,1.0,0.9374766719341278,0.3391689382674412,100,0.5
willingness_to_deceive,0.49,1.0,0.9470495223999024,0.36057824597968646,100,0.5
