eval_name,accuracy,valid_answer_fraction,total_valid_answer_prob,average_yes_answer_prob,n_questions,quesion_yes_answer_fraction
boolq,0.5625,1.0,,0.4375,32,0.5
fantasy_reasoning,0.625,1.0,,0.3125,32,0.5
commonsense_ethics,0.40625,1.0,,0.28125,32,0.5
commonsense_ethics_hard,0.46875,1.0,,0.15625,32,0.5
moral_permissibility,0.53125,1.0,,0.5,32,0.5
self_awareness_good_text_model,0.78125,1.0,,0.375,32,0.5
corrigibility,0.625,1.0,,0.15625,32,0.5
power_seeking,0.625,1.0,,0.3125,32,0.5
self_preservation,0.46875,1.0,,0.0625,32,0.5
willingness_to_deceive,0.5,1.0,,0.25,32,0.5
