,agent,coefficient,intercept,bce_loss,average,p50,p50q0.025,p50q0.975,p80,p80q0.025,p80q0.975,1-4 min,4-16 min,16-64 min,64-256 min,256-960 min,release_date
0,Claude 3 Opus,-0.551748,1.480455,0.319064,0.377459,6.422825,2.81768,13.041035,1.125572,0.405046,2.790543,0.594355,0.496913,0.227102,0.065516,0.006847,2024-03-04
1,Claude 3.5 Sonnet (New),-0.524556,2.547853,0.396384,0.52701,28.983512,13.190571,57.392749,4.640773,1.740445,11.458448,0.984675,0.683975,0.491526,0.209935,0.109551,2024-10-22
2,Claude 3.5 Sonnet (Old),-0.552429,2.31313,0.36829,0.476797,18.21683,9.499069,33.947596,3.19928,1.404955,7.264588,0.833525,0.669465,0.388459,0.120377,0.06847,2024-06-20
3,Claude 3.7 Sonnet,-0.753789,4.342522,0.290499,0.599681,54.226342,27.543637,89.296096,15.156049,5.776293,31.162886,1.0,0.895341,0.675348,0.262348,0.029572,2025-02-24
4,GPT-2,-0.49278,-2.292107,0.1799,0.101641,0.039792,0.001922,0.128508,0.005662,0.000144,0.026312,0.0,0.0,0.0,0.0,0.0,2019-02-14
5,GPT-4 0125,-0.640663,1.55288,0.25533,0.351726,5.366063,2.692947,9.760427,1.197496,0.525443,2.527085,0.605048,0.484919,0.128868,0.015838,0.006847,2024-01-25
6,GPT-4 0314,-0.560241,1.357645,0.303443,0.361067,5.364045,2.477818,10.085986,0.965175,0.354285,2.204193,0.477863,0.504061,0.184479,0.050435,0.0,2023-03-14
7,GPT-4 1106,-0.541915,1.678409,0.34057,0.404329,8.557433,4.164257,16.372855,1.453006,0.554641,3.593736,0.731348,0.605484,0.248634,0.095197,0.006847,2023-11-06
8,GPT-4 Turbo,-0.657572,1.785511,0.258854,0.367357,6.567339,3.237662,12.297619,1.523202,0.696944,3.185356,0.654733,0.53735,0.141537,0.049776,0.0,2024-04-09
9,GPT-4o,-0.568075,1.816133,0.328378,0.407636,9.17045,4.223451,18.274087,1.689572,0.711923,3.867677,0.73569,0.614769,0.237548,0.131602,0.0,2024-05-13
10,davinci-002 (GPT-3),-0.653157,-1.794859,0.158918,0.162822,0.14886,0.066983,0.247186,0.034187,0.01201,0.06211,0.0,0.0,0.0,0.0,0.0,2020-05-28
11,gpt-3.5-turbo-instruct,-0.76923,-0.558577,0.095878,0.215725,0.604514,0.235909,0.986246,0.173339,0.056374,0.291756,0.039164,0.037333,0.0,0.0,0.0,2022-03-15
12,human,-0.385584,2.548212,0.470403,0.645483,97.592792,43.050816,230.949896,8.074619,2.493143,22.674618,0.821208,0.850731,0.652467,0.460536,0.268076,
13,o1,-0.510703,2.703164,0.408309,0.55932,39.206576,17.65317,81.927796,5.973358,1.916938,16.317346,1.0,0.681395,0.582488,0.285403,0.108776,2024-12-05
14,o1-preview,-0.611026,2.733654,0.343418,0.494422,22.221357,11.706468,40.248298,4.610993,2.016391,9.674241,0.81338,0.774656,0.40691,0.18538,0.028366,2024-09-12
15,o3,-0.571223,3.883376,0.477229,0.5782,111.303394,50.019951,227.364429,20.698658,4.35412,52.641534,1.0,0.885562,0.787233,0.341826,0.237153,2025-04-16
16,o4-mini,-0.591306,3.714905,0.47286,0.534356,77.845828,32.968394,153.347589,15.327857,3.243369,37.53082,1.0,0.862998,0.715064,0.308114,0.173803,2025-04-16
