model,first_attempt_avg,first_attempt_sem,last_attempt_avg,last_attempt_sem,rating_avg,rating_sem,last_over_first_avg,last_over_first_sem,very_satisfied_avg,very_satisfied_sem
claude-3-5-sonnet-20241022,2.5185185185185186,0.163389754517831,2.5454545454545454,0.11552305822622867,2.5441860465116277,0.062431050001633355,0.5945945945945946,0.05746373039227156,0.15348837209302327,0.024583005258904663
gemini-1.5-flash,2.59375,0.19499470112734332,2.4324324324324325,0.15759329445527842,2.5891891891891894,0.07446999931801224,0.45121951219512196,0.05529053052331481,0.20540540540540542,0.02970249100807162
gemini-2.0-flash-exp,2.5434782608695654,0.14144363257929807,2.230769230769231,0.13898843939735447,2.477272727272727,0.06740249228244831,0.4444444444444444,0.05267171812666414,0.17272727272727273,0.025485518346296473
gpt-4o,2.7142857142857144,0.1614840472172683,2.608695652173913,0.14405534654828034,2.647727272727273,0.06599859574192125,0.4,0.06377928041432807,0.17045454545454544,0.028344451255575834
meta-llama/Llama-3.3-70B-Instruct-Turbo,2.41025641025641,0.154631374460496,2.409090909090909,0.15011687419387526,2.483695652173913,0.07510804108407503,0.4246575342465753,0.05825274348024235,0.19021739130434784,0.02893346333942199
Overall,2.546511627906977,0.07250728776305919,2.4444444444444446,0.06261745176644863,2.5448979591836736,0.030882466224339558,0.47229551451187335,0.025677716724830307,0.17755102040816326,0.38213434229362747
