winner,loser,cohens_d,win_rate,bp_ratio,viol_magnitude,viol_consistency,viol_stability,fragile
Claude 3.5 Sonnet (20241022),DeepSeek v3,0.01765315800347073,0.45614035087719296,0.017543859649122806,True,True,True,True
Claude 3.5 Sonnet (20241022),Gemini 1.5 Pro (002),0.09240996297748295,0.5964912280701754,0.05263157894736842,True,True,True,True
Claude 3.5 Sonnet (20241022),Claude 3.5 Sonnet (20240620),0.27174532156101244,0.543859649122807,0.10526315789473684,False,True,True,True
Claude 3.5 Sonnet (20241022),Claude 3 Opus (20240229),0.7562873232233703,0.7543859649122807,0.5789473684210527,False,False,False,False
Claude 3.5 Sonnet (20241022),Llama 3.1 Instruct Turbo (405B),0.8901413397711434,0.7719298245614035,0.631578947368421,False,False,False,False
Claude 3.5 Sonnet (20241022),GPT-4o (2024-08-06),0.5844624042214005,0.7368421052631579,0.40350877192982454,False,False,False,False
Claude 3.5 Sonnet (20241022),GPT-4o (2024-05-13),0.6090051293762383,0.6491228070175439,0.42105263157894735,False,False,False,False
Claude 3.5 Sonnet (20241022),Qwen2.5 Instruct Turbo (72B),0.9402822247606352,0.8596491228070176,0.7543859649122807,False,False,False,False
Claude 3.5 Sonnet (20241022),Gemini 1.5 Pro (001),0.5243668037183623,0.8421052631578947,0.543859649122807,False,False,False,False
Claude 3.5 Sonnet (20241022),GPT-4 (0613),1.0020649246207252,0.8947368421052632,0.7719298245614035,False,False,False,False
Claude 3.5 Sonnet (20241022),Qwen2 Instruct (72B),1.1033966262077182,0.9298245614035088,0.8771929824561403,False,False,False,False
Claude 3.5 Sonnet (20241022),Amazon Nova Pro,1.0355596987663889,0.9298245614035088,0.8947368421052632,False,False,False,False
Claude 3.5 Sonnet (20241022),Palmyra-X-004,1.2884650802532418,0.9649122807017544,0.9473684210526315,False,False,False,False
Claude 3.5 Sonnet (20241022),GPT-4 Turbo (2024-04-09),1.0118188509851211,0.9473684210526315,0.8771929824561403,False,False,False,False
Claude 3.5 Sonnet (20241022),Gemini 1.5 Pro (0409 preview),0.7891528212083839,0.9298245614035088,0.8596491228070176,False,False,False,False
Claude 3.5 Sonnet (20241022),Llama 3.2 Vision Instruct Turbo (90B),1.100293532713525,0.9649122807017544,0.9649122807017544,False,False,False,False
Claude 3.5 Sonnet (20241022),Llama 3.1 Instruct Turbo (70B),1.171638514432218,0.9649122807017544,0.9649122807017544,False,False,False,False
Claude 3.5 Sonnet (20241022),Mistral Large 2 (2407),1.14998733389456,0.9649122807017544,0.9473684210526315,False,False,False,False
Claude 3.5 Sonnet (20241022),Gemini 2.0 Flash (Experimental),1.1060762051269715,0.9473684210526315,0.9473684210526315,False,False,False,False
DeepSeek v3,Gemini 1.5 Pro (002),0.12137643214726841,0.5087719298245614,0.07017543859649122,True,True,True,True
DeepSeek v3,Claude 3.5 Sonnet (20240620),0.13976370615900505,0.47368421052631576,0.05263157894736842,True,True,True,True
DeepSeek v3,Claude 3 Opus (20240229),0.4995884129881667,0.631578947368421,0.3333333333333333,False,False,False,False
DeepSeek v3,Llama 3.1 Instruct Turbo (405B),0.6352925922896683,0.6842105263157895,0.40350877192982454,False,False,False,False
DeepSeek v3,GPT-4o (2024-08-06),0.423890864073919,0.6842105263157895,0.2807017543859649,False,False,False,False
DeepSeek v3,GPT-4o (2024-05-13),0.45089139142128476,0.5964912280701754,0.3157894736842105,False,True,False,True
DeepSeek v3,Qwen2.5 Instruct Turbo (72B),0.8822631928006002,0.8596491228070176,0.7192982456140351,False,False,False,False
DeepSeek v3,Gemini 1.5 Pro (001),0.5720503372199236,0.8421052631578947,0.631578947368421,False,False,False,False
DeepSeek v3,GPT-4 (0613),0.7064544619434395,0.7017543859649122,0.49122807017543857,False,False,False,False
DeepSeek v3,Qwen2 Instruct (72B),0.8888954800504208,0.8245614035087719,0.7017543859649122,False,False,False,False
DeepSeek v3,Amazon Nova Pro,1.0931464404154931,0.9122807017543859,0.8070175438596491,False,False,False,False
DeepSeek v3,Palmyra-X-004,1.0821479894670962,0.8771929824561403,0.8245614035087719,False,False,False,False
DeepSeek v3,GPT-4 Turbo (2024-04-09),0.8137890734201239,0.9122807017543859,0.7368421052631579,False,False,False,False
DeepSeek v3,Gemini 1.5 Pro (0409 preview),0.7831518597366758,0.9298245614035088,0.8596491228070176,False,False,False,False
DeepSeek v3,Llama 3.2 Vision Instruct Turbo (90B),0.8362868650373121,0.8771929824561403,0.7719298245614035,False,False,False,False
DeepSeek v3,Llama 3.1 Instruct Turbo (70B),0.8767809312085142,0.8947368421052632,0.7894736842105263,False,False,False,False
DeepSeek v3,Mistral Large 2 (2407),0.927462079186219,0.9298245614035088,0.8070175438596491,False,False,False,False
DeepSeek v3,Gemini 2.0 Flash (Experimental),1.0982754849574774,0.9298245614035088,0.9122807017543859,False,False,False,False
Gemini 1.5 Pro (002),Claude 3.5 Sonnet (20240620),0.06016327261019559,0.40350877192982454,0.017543859649122806,True,True,True,True
Gemini 1.5 Pro (002),Claude 3 Opus (20240229),0.38435374114924326,0.5614035087719298,0.19298245614035087,False,True,True,True
Gemini 1.5 Pro (002),Llama 3.1 Instruct Turbo (405B),0.5161229267066924,0.631578947368421,0.3157894736842105,False,False,False,False
Gemini 1.5 Pro (002),GPT-4o (2024-08-06),0.34698688029851943,0.5087719298245614,0.15789473684210525,False,True,True,True
Gemini 1.5 Pro (002),GPT-4o (2024-05-13),0.3665535024219018,0.5263157894736842,0.17543859649122806,False,True,True,True
Gemini 1.5 Pro (002),Qwen2.5 Instruct Turbo (72B),0.6762125529112046,0.7543859649122807,0.5789473684210527,False,False,False,False
Gemini 1.5 Pro (002),Gemini 1.5 Pro (001),0.5662918186116752,0.8596491228070176,0.6491228070175439,False,False,False,False
Gemini 1.5 Pro (002),GPT-4 (0613),0.60204582769565,0.6666666666666666,0.3684210526315789,False,False,False,False
Gemini 1.5 Pro (002),Qwen2 Instruct (72B),0.7204247391094754,0.8245614035087719,0.631578947368421,False,False,False,False
Gemini 1.5 Pro (002),Amazon Nova Pro,0.8561863688349183,0.9122807017543859,0.7894736842105263,False,False,False,False
Gemini 1.5 Pro (002),Palmyra-X-004,0.9211353870034634,0.9298245614035088,0.7719298245614035,False,False,False,False
Gemini 1.5 Pro (002),GPT-4 Turbo (2024-04-09),0.6914531403291008,0.7719298245614035,0.5614035087719298,False,False,False,False
Gemini 1.5 Pro (002),Gemini 1.5 Pro (0409 preview),0.7844188571962766,0.9298245614035088,0.8245614035087719,False,False,False,False
Gemini 1.5 Pro (002),Llama 3.2 Vision Instruct Turbo (90B),0.7471332081705083,0.8421052631578947,0.6140350877192983,False,False,False,False
Gemini 1.5 Pro (002),Llama 3.1 Instruct Turbo (70B),0.7912248375804395,0.8421052631578947,0.6842105263157895,False,False,False,False
Gemini 1.5 Pro (002),Mistral Large 2 (2407),0.8120960463625843,0.8947368421052632,0.7368421052631579,False,False,False,False
Gemini 1.5 Pro (002),Gemini 2.0 Flash (Experimental),1.0345370743844269,0.8596491228070176,0.7719298245614035,False,False,False,False
Claude 3.5 Sonnet (20240620),Claude 3 Opus (20240229),0.8369591186910126,0.7368421052631579,0.543859649122807,False,False,False,False
Claude 3.5 Sonnet (20240620),Llama 3.1 Instruct Turbo (405B),0.7000591181585556,0.7719298245614035,0.47368421052631576,False,False,False,False
Claude 3.5 Sonnet (20240620),GPT-4o (2024-08-06),0.5251326909645624,0.7192982456140351,0.3684210526315789,False,False,False,False
Claude 3.5 Sonnet (20240620),GPT-4o (2024-05-13),0.5889807935467299,0.7017543859649122,0.40350877192982454,False,False,False,False
Claude 3.5 Sonnet (20240620),Qwen2.5 Instruct Turbo (72B),0.8987345825409725,0.8070175438596491,0.6140350877192983,False,False,False,False
Claude 3.5 Sonnet (20240620),Gemini 1.5 Pro (001),0.428344978768323,0.7894736842105263,0.38596491228070173,False,False,False,False
Claude 3.5 Sonnet (20240620),GPT-4 (0613),1.0758374427422939,0.8771929824561403,0.7543859649122807,False,False,False,False
Claude 3.5 Sonnet (20240620),Qwen2 Instruct (72B),1.2139913006232668,0.9122807017543859,0.8245614035087719,False,False,False,False
Claude 3.5 Sonnet (20240620),Amazon Nova Pro,1.0782611144241823,0.9122807017543859,0.8070175438596491,False,False,False,False
Claude 3.5 Sonnet (20240620),Palmyra-X-004,1.4870300496060678,0.9473684210526315,0.9298245614035088,False,False,False,False
Claude 3.5 Sonnet (20240620),GPT-4 Turbo (2024-04-09),1.0633896462520163,0.8596491228070176,0.7894736842105263,False,False,False,False
Claude 3.5 Sonnet (20240620),Gemini 1.5 Pro (0409 preview),0.701176046835158,0.9122807017543859,0.7368421052631579,False,False,False,False
Claude 3.5 Sonnet (20240620),Llama 3.2 Vision Instruct Turbo (90B),1.162691585804038,0.9649122807017544,0.9473684210526315,False,False,False,False
Claude 3.5 Sonnet (20240620),Llama 3.1 Instruct Turbo (70B),1.2147678466740932,0.9298245614035088,0.9122807017543859,False,False,False,False
Claude 3.5 Sonnet (20240620),Mistral Large 2 (2407),1.2126093616948126,0.9649122807017544,0.9473684210526315,False,False,False,False
Claude 3.5 Sonnet (20240620),Gemini 2.0 Flash (Experimental),0.9727197038097868,0.9122807017543859,0.7894736842105263,False,False,False,False
Claude 3 Opus (20240229),Llama 3.1 Instruct Turbo (405B),0.012013945165253847,0.47368421052631576,0.017543859649122806,True,True,True,True
Claude 3 Opus (20240229),GPT-4o (2024-08-06),0.06656564307938456,0.45614035087719296,0.03508771929824561,True,True,True,True
Claude 3 Opus (20240229),GPT-4o (2024-05-13),0.11081344625420594,0.38596491228070173,0.05263157894736842,True,True,True,True
Claude 3 Opus (20240229),Qwen2.5 Instruct Turbo (72B),0.34002518144487,0.5964912280701754,0.22807017543859648,False,True,False,True
Claude 3 Opus (20240229),Gemini 1.5 Pro (001),0.2084528035587789,0.631578947368421,0.14035087719298245,False,False,True,True
Claude 3 Opus (20240229),GPT-4 (0613),0.602286017610001,0.6491228070175439,0.3684210526315789,False,False,False,False
Claude 3 Opus (20240229),Qwen2 Instruct (72B),0.7358725851765525,0.7719298245614035,0.49122807017543857,False,False,False,False
Claude 3 Opus (20240229),Amazon Nova Pro,0.6811281424536384,0.7543859649122807,0.47368421052631576,False,False,False,False
Claude 3 Opus (20240229),Palmyra-X-004,0.8322095743262985,0.8245614035087719,0.6140350877192983,False,False,False,False
Claude 3 Opus (20240229),GPT-4 Turbo (2024-04-09),0.7676350417107766,0.7368421052631579,0.5263157894736842,False,False,False,False
Claude 3 Opus (20240229),Gemini 1.5 Pro (0409 preview),0.48193180925590823,0.7543859649122807,0.49122807017543857,False,False,False,False
Claude 3 Opus (20240229),Llama 3.2 Vision Instruct Turbo (90B),0.9182026268567567,0.7719298245614035,0.6842105263157895,False,False,False,False
Claude 3 Opus (20240229),Llama 3.1 Instruct Turbo (70B),0.9586205577604194,0.8245614035087719,0.7192982456140351,False,False,False,False
Claude 3 Opus (20240229),Mistral Large 2 (2407),0.8798439243706243,0.8771929824561403,0.6491228070175439,False,False,False,False
Claude 3 Opus (20240229),Gemini 2.0 Flash (Experimental),0.6590827286418549,0.7368421052631579,0.45614035087719296,False,False,False,False
Llama 3.1 Instruct Turbo (405B),GPT-4o (2024-08-06),0.04134275910638255,0.40350877192982454,0.017543859649122806,True,True,True,True
Llama 3.1 Instruct Turbo (405B),GPT-4o (2024-05-13),0.07729536051684946,0.42105263157894735,0.03508771929824561,True,True,True,True
Llama 3.1 Instruct Turbo (405B),Qwen2.5 Instruct Turbo (72B),0.38108074913910384,0.631578947368421,0.24561403508771928,False,False,False,False
Llama 3.1 Instruct Turbo (405B),Gemini 1.5 Pro (001),0.23239742532141464,0.631578947368421,0.12280701754385964,False,False,True,True
Llama 3.1 Instruct Turbo (405B),GPT-4 (0613),0.5127882356360116,0.631578947368421,0.2982456140350877,False,False,False,False
Llama 3.1 Instruct Turbo (405B),Qwen2 Instruct (72B),0.6075951632289573,0.7719298245614035,0.42105263157894735,False,False,False,False
Llama 3.1 Instruct Turbo (405B),Amazon Nova Pro,0.6229081507013476,0.7543859649122807,0.47368421052631576,False,False,False,False
Llama 3.1 Instruct Turbo (405B),Palmyra-X-004,0.826454845719407,0.7543859649122807,0.543859649122807,False,False,False,False
Llama 3.1 Instruct Turbo (405B),GPT-4 Turbo (2024-04-09),0.682747448498194,0.6842105263157895,0.42105263157894735,False,False,False,False
Llama 3.1 Instruct Turbo (405B),Gemini 1.5 Pro (0409 preview),0.5062358989144065,0.8070175438596491,0.5087719298245614,False,False,False,False
Llama 3.1 Instruct Turbo (405B),Llama 3.2 Vision Instruct Turbo (90B),0.820009279064582,0.8245614035087719,0.6842105263157895,False,False,False,False
Llama 3.1 Instruct Turbo (405B),Llama 3.1 Instruct Turbo (70B),0.8801290443041251,0.8596491228070176,0.7368421052631579,False,False,False,False
Llama 3.1 Instruct Turbo (405B),Mistral Large 2 (2407),0.7867861724329035,0.8421052631578947,0.6666666666666666,False,False,False,False
Llama 3.1 Instruct Turbo (405B),Gemini 2.0 Flash (Experimental),0.6877772568514223,0.6842105263157895,0.49122807017543857,False,False,False,False
GPT-4o (2024-08-06),GPT-4o (2024-05-13),0.08148488272128754,0.47368421052631576,0.05263157894736842,True,True,True,True
GPT-4o (2024-08-06),Qwen2.5 Instruct Turbo (72B),0.19588466035661056,0.631578947368421,0.14035087719298245,True,False,True,True
GPT-4o (2024-08-06),Gemini 1.5 Pro (001),0.1628394734681736,0.6842105263157895,0.08771929824561403,True,False,True,True
GPT-4o (2024-08-06),GPT-4 (0613),0.45501122857455867,0.7017543859649122,0.2807017543859649,False,False,False,False
GPT-4o (2024-08-06),Qwen2 Instruct (72B),0.47815102976269763,0.7719298245614035,0.3157894736842105,False,False,False,False
GPT-4o (2024-08-06),Amazon Nova Pro,0.4322386536084213,0.7017543859649122,0.2982456140350877,False,False,False,False
GPT-4o (2024-08-06),Palmyra-X-004,0.598391500018644,0.8070175438596491,0.45614035087719296,False,False,False,False
GPT-4o (2024-08-06),GPT-4 Turbo (2024-04-09),0.79829422685478,0.7894736842105263,0.5614035087719298,False,False,False,False
GPT-4o (2024-08-06),Gemini 1.5 Pro (0409 preview),0.40595148245729906,0.8245614035087719,0.3508771929824561,False,False,False,False
GPT-4o (2024-08-06),Llama 3.2 Vision Instruct Turbo (90B),0.9988170157772619,0.8421052631578947,0.6666666666666666,False,False,False,False
GPT-4o (2024-08-06),Llama 3.1 Instruct Turbo (70B),1.0538475840849406,0.8596491228070176,0.7368421052631579,False,False,False,False
GPT-4o (2024-08-06),Mistral Large 2 (2407),0.7915043927065637,0.8245614035087719,0.543859649122807,False,False,False,False
GPT-4o (2024-08-06),Gemini 2.0 Flash (Experimental),0.5649231383105806,0.7719298245614035,0.3684210526315789,False,False,False,False
GPT-4o (2024-05-13),Qwen2.5 Instruct Turbo (72B),0.16597523724074537,0.6491228070175439,0.12280701754385964,True,False,True,True
GPT-4o (2024-05-13),Gemini 1.5 Pro (001),0.14566316363708554,0.6666666666666666,0.07017543859649122,True,False,True,True
GPT-4o (2024-05-13),GPT-4 (0613),0.45664809524657773,0.7368421052631579,0.2807017543859649,False,False,False,False
GPT-4o (2024-05-13),Qwen2 Instruct (72B),0.4870215554287842,0.7719298245614035,0.3684210526315789,False,False,False,False
GPT-4o (2024-05-13),Amazon Nova Pro,0.44622927062879264,0.8070175438596491,0.3333333333333333,False,False,False,False
GPT-4o (2024-05-13),Palmyra-X-004,0.6499022372126002,0.8596491228070176,0.49122807017543857,False,False,False,False
GPT-4o (2024-05-13),GPT-4 Turbo (2024-04-09),0.7735298103012732,0.8421052631578947,0.5614035087719298,False,False,False,False
GPT-4o (2024-05-13),Gemini 1.5 Pro (0409 preview),0.3755783609878089,0.7543859649122807,0.3333333333333333,False,False,False,False
GPT-4o (2024-05-13),Llama 3.2 Vision Instruct Turbo (90B),1.0236256111611803,0.8421052631578947,0.7192982456140351,False,False,False,False
GPT-4o (2024-05-13),Llama 3.1 Instruct Turbo (70B),1.1175083885846777,0.8947368421052632,0.8070175438596491,False,False,False,False
GPT-4o (2024-05-13),Mistral Large 2 (2407),0.8220882394626104,0.8596491228070176,0.5964912280701754,False,False,False,False
GPT-4o (2024-05-13),Gemini 2.0 Flash (Experimental),0.5457523710982932,0.7719298245614035,0.3508771929824561,False,False,False,False
Qwen2.5 Instruct Turbo (72B),Gemini 1.5 Pro (001),0.08105529219504684,0.5614035087719298,0.017543859649122806,True,True,True,True
Qwen2.5 Instruct Turbo (72B),GPT-4 (0613),0.21831876865756342,0.49122807017543857,0.10526315789473684,False,True,True,True
Qwen2.5 Instruct Turbo (72B),Qwen2 Instruct (72B),0.3846213348024837,0.5789473684210527,0.21052631578947367,False,True,False,True
Qwen2.5 Instruct Turbo (72B),Amazon Nova Pro,0.46066666908947335,0.5964912280701754,0.3157894736842105,False,True,False,True
Qwen2.5 Instruct Turbo (72B),Palmyra-X-004,0.5200622098025408,0.6491228070175439,0.3157894736842105,False,False,False,False
Qwen2.5 Instruct Turbo (72B),GPT-4 Turbo (2024-04-09),0.48900343980203353,0.5789473684210527,0.2982456140350877,False,True,False,True
Qwen2.5 Instruct Turbo (72B),Gemini 1.5 Pro (0409 preview),0.3198981766367693,0.6491228070175439,0.2982456140350877,False,False,False,False
Qwen2.5 Instruct Turbo (72B),Llama 3.2 Vision Instruct Turbo (90B),0.5817857616247647,0.7192982456140351,0.43859649122807015,False,False,False,False
Qwen2.5 Instruct Turbo (72B),Llama 3.1 Instruct Turbo (70B),0.6201900683077997,0.7543859649122807,0.47368421052631576,False,False,False,False
Qwen2.5 Instruct Turbo (72B),Mistral Large 2 (2407),0.6011461276379191,0.7543859649122807,0.47368421052631576,False,False,False,False
Qwen2.5 Instruct Turbo (72B),Gemini 2.0 Flash (Experimental),0.5003421022677684,0.6491228070175439,0.2631578947368421,False,False,False,False
Gemini 1.5 Pro (001),GPT-4 (0613),0.027922520291485734,0.47368421052631576,0.017543859649122806,True,True,True,True
Gemini 1.5 Pro (001),Qwen2 Instruct (72B),0.03927222818587056,0.49122807017543857,0.017543859649122806,True,True,True,True
Gemini 1.5 Pro (001),Amazon Nova Pro,0.0863869041250263,0.5263157894736842,0.05263157894736842,True,True,True,True
Gemini 1.5 Pro (001),Palmyra-X-004,0.1549054411417858,0.6666666666666666,0.10526315789473684,True,False,True,True
Gemini 1.5 Pro (001),GPT-4 Turbo (2024-04-09),0.1412195889238036,0.47368421052631576,0.07017543859649122,True,True,True,True
Gemini 1.5 Pro (001),Gemini 1.5 Pro (0409 preview),0.40787238909788476,0.5614035087719298,0.2631578947368421,False,True,False,True
Gemini 1.5 Pro (001),Llama 3.2 Vision Instruct Turbo (90B),0.22908255765680355,0.5789473684210527,0.12280701754385964,False,True,True,True
Gemini 1.5 Pro (001),Llama 3.1 Instruct Turbo (70B),0.25094152353839977,0.5614035087719298,0.14035087719298245,False,True,True,True
Gemini 1.5 Pro (001),Mistral Large 2 (2407),0.25466401953342443,0.6666666666666666,0.15789473684210525,False,False,True,True
Gemini 1.5 Pro (001),Gemini 2.0 Flash (Experimental),0.27676366675210834,0.5614035087719298,0.17543859649122806,False,True,True,True
GPT-4 (0613),Qwen2 Instruct (72B),0.0235534045349971,0.5789473684210527,0.017543859649122806,True,True,True,True
GPT-4 (0613),Amazon Nova Pro,0.08948011775304762,0.5789473684210527,0.03508771929824561,True,True,True,True
GPT-4 (0613),Palmyra-X-004,0.27939840751784023,0.6666666666666666,0.19298245614035087,False,False,True,True
GPT-4 (0613),GPT-4 Turbo (2024-04-09),0.37581584960252884,0.6140350877192983,0.21052631578947367,False,False,False,False
GPT-4 (0613),Gemini 1.5 Pro (0409 preview),0.17367092760022002,0.631578947368421,0.07017543859649122,True,False,True,True
GPT-4 (0613),Llama 3.2 Vision Instruct Turbo (90B),0.6019818130862075,0.7368421052631579,0.43859649122807015,False,False,False,False
GPT-4 (0613),Llama 3.1 Instruct Turbo (70B),0.6648261758461657,0.7719298245614035,0.45614035087719296,False,False,False,False
GPT-4 (0613),Mistral Large 2 (2407),0.5761876522119874,0.7719298245614035,0.42105263157894735,False,False,False,False
GPT-4 (0613),Gemini 2.0 Flash (Experimental),0.3388855870824991,0.5789473684210527,0.15789473684210525,False,True,True,True
Qwen2 Instruct (72B),Amazon Nova Pro,0.10670014063401646,0.49122807017543857,0.05263157894736842,True,True,True,True
Qwen2 Instruct (72B),Palmyra-X-004,0.2906676073245,0.5789473684210527,0.15789473684210525,False,True,True,True
Qwen2 Instruct (72B),GPT-4 Turbo (2024-04-09),0.279386617112564,0.47368421052631576,0.12280701754385964,False,True,True,True
Qwen2 Instruct (72B),Gemini 1.5 Pro (0409 preview),0.17519364128119588,0.5789473684210527,0.08771929824561403,True,True,True,True
Qwen2 Instruct (72B),Llama 3.2 Vision Instruct Turbo (90B),0.4531742017293826,0.6140350877192983,0.2631578947368421,False,False,False,False
Qwen2 Instruct (72B),Llama 3.1 Instruct Turbo (70B),0.5094420082142046,0.5964912280701754,0.3333333333333333,False,True,False,True
Qwen2 Instruct (72B),Mistral Large 2 (2407),0.46579914633589226,0.7017543859649122,0.2982456140350877,False,False,False,False
Qwen2 Instruct (72B),Gemini 2.0 Flash (Experimental),0.3580998704649268,0.5789473684210527,0.17543859649122806,False,True,True,True
Amazon Nova Pro,Palmyra-X-004,0.1890576207371278,0.5614035087719298,0.08771929824561403,True,True,True,True
Amazon Nova Pro,GPT-4 Turbo (2024-04-09),0.1442670092685655,0.45614035087719296,0.07017543859649122,True,True,True,True
Amazon Nova Pro,Gemini 1.5 Pro (0409 preview),0.13623203545031992,0.5789473684210527,0.05263157894736842,True,True,True,True
Amazon Nova Pro,Llama 3.2 Vision Instruct Turbo (90B),0.31456311219380967,0.5789473684210527,0.15789473684210525,False,True,True,True
Amazon Nova Pro,Llama 3.1 Instruct Turbo (70B),0.3481148864809016,0.5964912280701754,0.17543859649122806,False,True,True,True
Amazon Nova Pro,Mistral Large 2 (2407),0.365891256432931,0.6140350877192983,0.21052631578947367,False,False,False,False
Amazon Nova Pro,Gemini 2.0 Flash (Experimental),0.2869762813735117,0.5263157894736842,0.14035087719298245,False,True,True,True
Palmyra-X-004,GPT-4 Turbo (2024-04-09),0.002104922124831312,0.3333333333333333,0.017543859649122806,True,True,True,True
Palmyra-X-004,Gemini 1.5 Pro (0409 preview),0.03976319390100774,0.3508771929824561,0.017543859649122806,True,True,True,True
Palmyra-X-004,Llama 3.2 Vision Instruct Turbo (90B),0.18363387880795418,0.42105263157894735,0.07017543859649122,True,True,True,True
Palmyra-X-004,Llama 3.1 Instruct Turbo (70B),0.22166354404494243,0.45614035087719296,0.10526315789473684,False,True,True,True
Palmyra-X-004,Mistral Large 2 (2407),0.3205890654215396,0.49122807017543857,0.15789473684210525,False,True,True,True
Palmyra-X-004,Gemini 2.0 Flash (Experimental),0.2118962841524096,0.47368421052631576,0.08771929824561403,False,True,True,True
GPT-4 Turbo (2024-04-09),Gemini 1.5 Pro (0409 preview),0.036149792366523624,0.5964912280701754,0.017543859649122806,True,True,True,True
GPT-4 Turbo (2024-04-09),Llama 3.2 Vision Instruct Turbo (90B),0.33617484955379484,0.5964912280701754,0.21052631578947367,False,True,False,True
GPT-4 Turbo (2024-04-09),Llama 3.1 Instruct Turbo (70B),0.36259944982766,0.6491228070175439,0.22807017543859648,False,False,False,False
GPT-4 Turbo (2024-04-09),Mistral Large 2 (2407),0.28017834620758864,0.6842105263157895,0.15789473684210525,False,False,True,True
GPT-4 Turbo (2024-04-09),Gemini 2.0 Flash (Experimental),0.17315959609621814,0.49122807017543857,0.08771929824561403,True,True,True,True
Gemini 1.5 Pro (0409 preview),Llama 3.2 Vision Instruct Turbo (90B),0.0750896350345385,0.47368421052631576,0.03508771929824561,True,True,True,True
Gemini 1.5 Pro (0409 preview),Llama 3.1 Instruct Turbo (70B),0.09572492025370315,0.5263157894736842,0.05263157894736842,True,True,True,True
Gemini 1.5 Pro (0409 preview),Mistral Large 2 (2407),0.09881319654348933,0.5614035087719298,0.05263157894736842,True,True,True,True
Gemini 1.5 Pro (0409 preview),Gemini 2.0 Flash (Experimental),0.11563303338218428,0.49122807017543857,0.07017543859649122,True,True,True,True
Llama 3.2 Vision Instruct Turbo (90B),Llama 3.1 Instruct Turbo (70B),0.15160213948401616,0.5087719298245614,0.08771929824561403,True,True,True,True
Llama 3.2 Vision Instruct Turbo (90B),Mistral Large 2 (2407),0.058981702157504735,0.5614035087719298,0.03508771929824561,True,True,True,True
Llama 3.2 Vision Instruct Turbo (90B),Gemini 2.0 Flash (Experimental),0.05416686376334773,0.45614035087719296,0.03508771929824561,True,True,True,True
Llama 3.1 Instruct Turbo (70B),Mistral Large 2 (2407),0.02490410566712835,0.543859649122807,0.017543859649122806,True,True,True,True
Llama 3.1 Instruct Turbo (70B),Gemini 2.0 Flash (Experimental),0.03855147266809162,0.43859649122807015,0.017543859649122806,True,True,True,True
Mistral Large 2 (2407),Gemini 2.0 Flash (Experimental),0.028191735737057664,0.42105263157894735,0.017543859649122806,True,True,True,True
