model_baseline,model_variation,alternative_hypo,p_value,effect_size,reject_null_95,reject_null_99,reject_null_999,effect_size_small,effect_size_middle,effect_size_large
llama-3.1-8b,llama_far,llama < llama_far,0.729753918926777,0.050000000000000044,False,False,False,False,False,False
llama-3.1-8b,llama_pt,llama < llama_pt,0.8414820857743917,0.0818000000000001,False,False,False,False,False,False
llama-3.1-8b,llama_sft,llama < llama_sft,0.7993506536203655,0.0686,False,False,False,False,False,False
llama-3.1-8b,llama_rl,llama < llama_rl,0.9354572583728173,0.11319999999999997,False,False,False,True,False,False
gpt-4o,gpt_far,gpt < gpt_far,0.6407259346662892,0.029400000000000093,False,False,False,False,False,False
gpt-4o,gpt_pt,gpt < gpt_pt,0.4301387881587845,0.024399999999999977,False,False,False,False,False,False
gpt-4o,gpt_sft,gpt < gpt_sft,0.3340290220250914,0.03520000000000001,False,False,False,False,False,False
llama-3.1-8b,llama_far,llama > llama_far,0.27105494879026076,0.050000000000000044,False,False,False,False,False,False
llama-3.1-8b,llama_pt,llama > llama_pt,0.15910952943573176,0.0818000000000001,False,False,False,False,False,False
llama-3.1-8b,llama_sft,llama > llama_sft,0.2013354382844767,0.0686,False,False,False,False,False,False
llama-3.1-8b,llama_rl,llama > llama_rl,0.06485376103346896,0.11319999999999997,False,False,False,True,False,False
gpt-4o,gpt_far,gpt > gpt_far,0.36018794418546995,0.029400000000000093,False,False,False,False,False,False
gpt-4o,gpt_pt,gpt > gpt_pt,0.570828031819366,0.024399999999999977,False,False,False,False,False,False
gpt-4o,gpt_sft,gpt > gpt_sft,0.6668596576367942,0.03520000000000001,False,False,False,False,False,False
