model_baseline,model_variation,alternative_hypo,topic,p_value,effect_size,reject_null_95,reject_null_99,reject_null_999,effect_size_small,effect_size_middle,effect_size_large
gpt-4o,gpt-4o-far,gpt < gpt-4o-far,college_mathematics,0.34960955815361416,0.05055401662049863,False,False,False,False,False,False
gpt-4o,gpt-4o-pt,gpt < gpt-4o-pt,college_mathematics,0.574256879945578,0.023545706371191244,False,False,False,False,False,False
gpt-4o,gpt-4o-sft,gpt < gpt-4o-sft,college_mathematics,0.23002580080256857,0.09556786703601106,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-far,llama < llama-3.1-8b-far,college_mathematics,0.2911171306067889,0.07340720221606645,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-pt,llama < llama-3.1-8b-pt,college_mathematics,0.3805039910832382,0.04085872576177285,False,False,False,False,False,False
gpt-4o,gpt-4o-far,gpt > gpt-4o-far,college_mathematics,0.654355877722127,0.05055401662049863,False,False,False,False,False,False
gpt-4o,gpt-4o-pt,gpt > gpt-4o-pt,college_mathematics,0.42994114603443506,0.023545706371191244,False,False,False,False,False,False
gpt-4o,gpt-4o-sft,gpt > gpt-4o-sft,college_mathematics,0.7732361270853065,0.09556786703601106,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-far,llama > llama-3.1-8b-far,college_mathematics,0.7124657774456948,0.07340720221606645,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-pt,llama > llama-3.1-8b-pt,college_mathematics,0.623484616386277,0.04085872576177285,False,False,False,False,False,False
gpt-4o,gpt-4o-far,gpt < gpt-4o-far,conceptual_physics,0.5863671206092816,0.017715419501133756,False,False,False,False,False,False
gpt-4o,gpt-4o-pt,gpt < gpt-4o-pt,conceptual_physics,0.857439693161818,0.08900226757369611,False,False,False,False,False,False
gpt-4o,gpt-4o-sft,gpt < gpt-4o-sft,conceptual_physics,0.829137072705074,0.07893990929705219,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-far,llama < llama-3.1-8b-far,conceptual_physics,0.4896444496653486,0.0024092970521542023,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-pt,llama < llama-3.1-8b-pt,conceptual_physics,0.7824853560193368,0.0683106575963719,False,False,False,False,False,False
gpt-4o,gpt-4o-far,gpt > gpt-4o-far,conceptual_physics,0.41498267243613285,0.017715419501133756,False,False,False,False,False,False
gpt-4o,gpt-4o-pt,gpt > gpt-4o-pt,conceptual_physics,0.1433275173270917,0.08900226757369611,False,False,False,False,False,False
gpt-4o,gpt-4o-sft,gpt > gpt-4o-sft,conceptual_physics,0.17172947129614896,0.07893990929705219,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-far,llama > llama-3.1-8b-far,conceptual_physics,0.5116496465771081,0.0024092970521542023,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-pt,llama > llama-3.1-8b-pt,conceptual_physics,0.21846668613775228,0.0683106575963719,False,False,False,False,False,False
gpt-4o,gpt-4o-far,gpt < gpt-4o-far,elementary_mathematics,0.6219911410377765,0.022493074792243828,False,False,False,False,False,False
gpt-4o,gpt-4o-pt,gpt < gpt-4o-pt,elementary_mathematics,0.554139797478752,0.009750692520775539,False,False,False,False,False,False
gpt-4o,gpt-4o-sft,gpt < gpt-4o-sft,elementary_mathematics,0.6573231300494987,0.02947368421052632,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-far,llama < llama-3.1-8b-far,elementary_mathematics,0.49125271022389994,0.0018836565096952418,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-pt,llama < llama-3.1-8b-pt,elementary_mathematics,0.4901392341350687,0.002105263157894721,False,False,False,False,False,False
gpt-4o,gpt-4o-far,gpt > gpt-4o-far,elementary_mathematics,0.3791674017044337,0.022493074792243828,False,False,False,False,False,False
gpt-4o,gpt-4o-pt,gpt > gpt-4o-pt,elementary_mathematics,0.44706958065770735,0.009750692520775539,False,False,False,False,False,False
gpt-4o,gpt-4o-sft,gpt > gpt-4o-sft,elementary_mathematics,0.3437929196488266,0.02947368421052632,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-far,llama > llama-3.1-8b-far,elementary_mathematics,0.509840491536679,0.0018836565096952418,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-pt,llama > llama-3.1-8b-pt,elementary_mathematics,0.5109561448005342,0.002105263157894721,False,False,False,False,False,False
gpt-4o,gpt-4o-far,gpt < gpt-4o-far,high_school_mathematics,0.5613056195230477,0.01218836565096959,False,False,False,False,False,False
gpt-4o,gpt-4o-pt,gpt < gpt-4o-pt,high_school_mathematics,0.4910968966024769,0.0018836565096952418,False,False,False,False,False,False
gpt-4o,gpt-4o-sft,gpt < gpt-4o-sft,high_school_mathematics,0.5639079852365662,0.012742382271468067,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-far,llama < llama-3.1-8b-far,high_school_mathematics,0.47454557721231805,0.005429362880886468,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-pt,llama < llama-3.1-8b-pt,high_school_mathematics,0.5733288717919385,0.015290858725761858,False,False,False,False,False,False
gpt-4o,gpt-4o-far,gpt > gpt-4o-far,high_school_mathematics,0.43979048354349876,0.01218836565096959,False,False,False,False,False,False
gpt-4o,gpt-4o-pt,gpt > gpt-4o-pt,high_school_mathematics,0.5100157704663246,0.0018836565096952418,False,False,False,False,False,False
gpt-4o,gpt-4o-sft,gpt > gpt-4o-sft,high_school_mathematics,0.43718464488539305,0.012742382271468067,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-far,llama > llama-3.1-8b-far,high_school_mathematics,0.5265134921135165,0.005429362880886468,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-pt,llama > llama-3.1-8b-pt,high_school_mathematics,0.4277145117481465,0.015290858725761858,False,False,False,False,False,False
gpt-4o,gpt-4o-far,gpt < gpt-4o-far,moral_scenarios,0.6681106645345287,0.03132086167800452,False,False,False,False,False,False
gpt-4o,gpt-4o-pt,gpt < gpt-4o-pt,moral_scenarios,0.4465608913871876,0.00963718820861681,False,False,False,False,False,False
gpt-4o,gpt-4o-sft,gpt < gpt-4o-sft,moral_scenarios,0.9546082666632328,0.12684240362811794,False,False,False,True,False,False
llama-3.1-8b,llama-3.1-8b-far,llama < llama-3.1-8b-far,moral_scenarios,0.3132768941245006,0.041241496598639404,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-pt,llama < llama-3.1-8b-pt,moral_scenarios,0.6760013179694992,0.03883219954648531,False,False,False,False,False,False
gpt-4o,gpt-4o-far,gpt > gpt-4o-far,moral_scenarios,0.33331203770028456,0.03132086167800452,False,False,False,False,False,False
gpt-4o,gpt-4o-pt,gpt > gpt-4o-pt,moral_scenarios,0.5550242991346307,0.00963718820861681,False,False,False,False,False,False
gpt-4o,gpt-4o-sft,gpt > gpt-4o-sft,moral_scenarios,0.045753226421606896,0.12684240362811794,True,False,False,True,False,False
llama-3.1-8b,llama-3.1-8b-far,llama > llama-3.1-8b-far,moral_scenarios,0.6879114201280869,0.041241496598639404,False,False,False,False,False,False
llama-3.1-8b,llama-3.1-8b-pt,llama > llama-3.1-8b-pt,moral_scenarios,0.3251931085547158,0.03883219954648531,False,False,False,False,False,False
