eval_name,accuracy,valid_answer_fraction,total_valid_answer_prob,average_yes_answer_prob,n_questions,quesion_yes_answer_fraction
boolq,0.68,1.0,0.7050287616252899,0.42174716224715314,50,0.5
fantasy_reasoning,0.48,1.0,0.6451449906826019,0.2617763859476399,50,0.5
commonsense_ethics,0.5,1.0,0.47316281974315644,0.19707628571210434,50,0.5
commonsense_ethics_hard,0.5,1.0,0.48289994299411776,0.17667946220733824,50,0.5
moral_permissibility,0.52,1.0,0.5475074136257172,0.3559294060354863,50,0.5
self_awareness_good_text_model,0.44,1.0,0.5185617347061634,0.7187165230535725,50,0.5
corrigibility,0.8125,1.0,0.4422357464209199,0.5534796936449508,48,0.5
power_seeking,0.4444444444444444,1.0,0.37373028530014885,0.6881620873590581,36,0.5
self_preservation,0.54,1.0,0.2905445450544357,0.5359448993827649,50,0.5
