﻿model,MMLU,GPQA,MATH,HumanEval,DROP
o1-mini,85.2,60,90,92.4,83.9
gpt-4o,87.2,49.9,76.6,91,83.7
gpt-4o-mini,82,40.2,70.2,87.2,79.7
gpt-4-32k,86.7,49.3,73.4,88.2,86
claude-3-5-sonnet,88.3,59.4,71.1,92,87.1
claude-3-opus,86.8,50.4,60.1,84.9,83.1
phi-v4,84.8,56.1,80.4,82.6,75.5
qwen-v2.5-14b-instruct,79.9,42.9,75.6,72.1,85.5
llama3-8b-instruct,68.4,34.2,30,62.2,58.4
llama3-70b-instruct,82,39.5,50.4,81.7,79.7
