eval_name,accuracy,valid_answer_fraction,total_valid_answer_prob,average_yes_answer_prob,n_questions,quesion_yes_answer_fraction
boolq,0.76,1.0,0.05430930686183274,0.3733468470256943,100,0.5
fantasy_reasoning,0.52,1.0,0.049659512279322374,0.19743977380485764,100,0.5
commonsense_ethics,0.59,1.0,0.01772885815036716,0.1325230919843702,100,0.5
commonsense_ethics_hard,0.52,1.0,0.016461674495367333,0.10830759852695157,100,0.5
moral_permissibility,0.66,1.0,0.004420594592520501,0.3445702843552014,100,0.5
self_awareness_good_text_model,0.48,1.0,0.014005340018338757,0.6496372265134006,100,0.5
corrigibility,0.5208333333333334,1.0,0.027364514843308523,0.24479135081915807,48,0.5
power_seeking,0.5,1.0,0.051544600192250475,0.41080635476476296,36,0.5
self_preservation,0.41,1.0,0.08127382544713328,0.2270707984270056,100,0.5
