model_name,thinking_mode,model_family,instruction_tuned,source,AIME'24,AIME'25,ARC-C,AlignBench_v1.1,Arena-Hard,AutoLogi,BBH,BFCL_v3,C-Eval,CRUX-O,CodeForces_Percentile,CodeForces_Rating,Creative_Writing_v3,EvalPlus,GPQA,GPQA-Diamond,GSM8K,HellaSwag,HumanEval,IFEval_strict_prompt,INCLUDE,LiveBench_0831,LiveBench_2024-11-25,LiveCodeBench,LiveCodeBench_v5,MATH,MATH-500,MBPP,MGSM,MLogiQA,MMLU,MMLU-Pro,MMLU-Redux,MMMLU,MMMLU_14_langs,MT-AIME2024,MTbench,Multi-IF,MultiPL-E,PolyMath,RULER_Avg,SuperGPQA,TruthfulQA,Winogrande,WritingBench,ZebraLogic
DeepSeek/DeepSeek-R1-Distill-Llama-70B,Thinking,DeepSeek,Yes,Qwen3 Report Table 13,,,,,,,,,71.8,,,,,,,65.2,,,,,,,54.5,,,,,,,,,,89.3,,,,,,,,,,,,,
Qwen/QwQ-32B,Thinking,Qwen,Yes,"Qwen3 Report Table 15 (Thinking, Qwen3-30B-A3B / Qwen3-14B / QwQ-32B)",,,,,,,,,88.4,,,,,,,65.6,,,,,,,72.0,,,,,,,,,,90.0,,,,,,,,,,,,,
OpenAI/o3-mini,Thinking,OpenAI,Yes,Qwen3 Report Table 13,,,,,,,,,75.1,,,,,,,76.8,,,,,,,70.0,,,,,,,,,,90.0,,,,,,,,,,,,,
Qwen/Qwen3-32B,Thinking,Qwen,Varies,Qwen3 Report Table 23 (RULER),31.0,20.2,,8.58,92.8,78.5,,63.0,83.3,,71.0,1353.0,78.3,,,54.6,,,,83.2,70.9,,59.8,,31.3,,88.6,,,62.9,,,85.7,,76.5,24.1,,70.7,,22.5,91.0,,,,7.54,29.2
OpenAI/GPT-4o-mini-2024-07-18,Non-thinking,OpenAI,Yes,Qwen3 Report Table 14,8.1,8.8,,7.81,74.9,52.6,,64.0,66.3,,52.6,1113.0,70.3,,,40.2,,,,80.4,66.0,,41.3,,27.9,,78.2,,,42.6,,,81.5,,72.1,6.0,,62.4,,12.0,,,,,5.98,20.1
Llama/LLaMA-4-Scout,Non-thinking,Llama,Unknown,Qwen3 Report Table 14,28.6,10.0,,7.49,70.5,56.8,,45.4,78.2,,43.7,981.0,55.0,,,57.2,,,,84.7,74.1,,47.6,,29.8,,82.6,,,53.9,,,86.3,,77.5,19.1,,64.2,,20.9,,,,,5.49,24.2
Qwen/Qwen2.5-72B-Instruct,Non-thinking,Qwen,Yes,Qwen3 Report Table 12 (Non-thinking),18.9,15.0,,7.89,81.2,66.1,,63.4,84.7,,35.0,859,61.8,,,49.0,,,,86.1,69.6,,51.4,,30.7,,83.6,,,59.2,,,86.8,,74.3,12.7,,65.3,,16.7,95.1,,,,7.06,26.6
Qwen/Qwen2.5-7B-Instruct,Non-thinking,Qwen,Yes,"Qwen3 Report Table 18 (Non-thinking, Qwen3-8B / Qwen3-4B / LLaMA-3.1-8B)",,,,,,,,,78.0,,,,,,,36.4,,,,,,,58.9,,,,,,,,,,75.4,,,,,,,,85.4,,,,,
Qwen/Qwen2.5-14B-Instruct,Non-thinking,Qwen,Yes,"Qwen3 Report Table 18 (Non-thinking, Qwen3-8B / Qwen3-4B / LLaMA-3.1-8B)",,,,,,,,,72.2,,,,,,,45.5,,,,,,,60.7,,,,,,,,,,80.0,,,,,,,,91.4,,,,,
Qwen/Qwen2.5-32B-Instruct,Non-thinking,Qwen,Yes,"Qwen3 Report Table 16 (Non-thinking, Qwen3-30B-A3B / Qwen3-14B / Gemma-3-27B)",,,,,,,,,86.0,,,,,,,49.5,,,,,,,50.0,,,,,,,,,,83.9,,,,,,,,92.9,,,,,
Qwen/Qwen3-4B,Non-thinking,Qwen,No,"Qwen3 Report Table 18 (Non-thinking, Qwen3-8B / Qwen3-4B / LLaMA-3.1-8B)",,,,,,,,,72.2,,,,,,,41.7,,,,,,,48.4,,,,,,,,,,77.3,,,,,,,,83.5,,,,,
Qwen/Qwen3-8B,Non-thinking,Qwen,No,"Qwen3 Report Table 18 (Non-thinking, Qwen3-8B / Qwen3-4B / LLaMA-3.1-8B)",,,,,,,,,77.9,,,,,,,39.3,,,,,,,53.5,,,,,,,,,,79.5,,,,,,,,84.4,,,,,
Qwen/Qwen3-14B,Non-thinking,Qwen,No,"Qwen3 Report Table 16 (Non-thinking, Qwen3-30B-A3B / Qwen3-14B / Gemma-3-27B)",,,,,,,,,81.0,,,,,,,54.8,,,,,,,59.6,,,,,,,,,,82.0,,,,,,,,89.1,,,,,
Qwen/Qwen3-30B-A3B,Non-thinking,Qwen,No,"Qwen3 Report Table 16 (Non-thinking, Qwen3-30B-A3B / Qwen3-14B / Gemma-3-27B)",,,,,,,,,82.9,,,,,,,54.8,,,,,,,59.4,,,,,,,,,,84.1,,,,,,,,86.6,,,,,
Qwen/Qwen3-235B-A22B,Non-thinking,Qwen,No,Qwen3 Report Table 12 (Non-thinking),40.1,24.7,,8.91,96.1,83.3,,68.0,86.1,,75.7,1387,80.4,,,62.9,,,,83.2,75.6,,62.5,,35.3,,91.2,,,67.6,,,89.2,,79.8,32.4,,70.2,,32.4,92.2,,,,7.7,37.7
Qwen/Qwen2.5-32B-Base,Non-thinking,Qwen,No,Qwen3 Report Table 4 (Base models @~32B),,,,,,,84.48,,,67.8,,,,66.25,47.97,,92.87,,,,64.35,,,,,57.7,,73.6,78.12,,83.32,55.1,81.97,82.4,,,,,58.3,,,33.55,,,,
Qwen/Qwen2.5-72B-Base,Non-thinking,Qwen,No,Qwen3 Report Table 4 (Base models @~32B),,,,,,,86.3,,,68.5,,,,65.93,45.88,,91.5,,,,69.05,,,,,62.12,,75.4,82.4,,86.06,58.07,83.91,83.49,,,,,58.7,,,36.2,,,,
Google/Gemma-3-27B-Base,Non-thinking,Gemma,No,Qwen3 Report Table 4 (Base models @~32B),,,,,,,79.95,,,60.4,,,,55.78,26.26,,81.2,,,,68.94,,,,,51.78,,68.4,73.74,,78.69,52.88,76.53,77.62,,,,,45.03,,,29.87,,,,
Llama/Llama-4-Scout-Base,Non-thinking,Llama,No,Qwen3 Report Table 4 (Base models @~32B),,,,,,,82.4,,,61.9,,,,59.9,40.4,,85.37,,,,68.09,,,,,51.66,,68.6,79.93,,78.27,56.13,71.09,82.05,,,,,47.38,,,26.51,,,,
Qwen/Qwen3-32B-Base,Non-thinking,Qwen,No,Qwen3 Report Table 4 (Base models @~32B),,,,,,,87.38,,,72.5,,,,72.05,49.49,,93.4,,,,67.67,,,,,61.62,,78.2,83.06,,83.61,65.54,83.41,83.83,,,,,67.06,,,39.78,,,,
Llama/Llama-3-8B-Base,Non-thinking,Llama,No,Qwen3 Report Table 6 (Base models @~8B),,,,,,,57.7,,,36.8,,,,44.13,25.8,,55.3,,,,44.94,,,,,20.5,,48.4,38.92,,66.6,35.56,61.59,59.65,,,,,31.45,,,20.54,,,,
Qwen/Qwen2.5-7B-Base,Non-thinking,Qwen,No,Qwen3 Report Table 7 (Base models @~4B),,,,,,,70.4,,,48.5,,,,62.18,36.36,,85.36,,,,53.98,,,,,49.8,,63.4,63.6,,74.16,45.0,71.06,71.34,,,,,50.73,,,26.34,,,,
Qwen/Qwen2.5-14B-Base,Non-thinking,Qwen,No,Qwen3 Report Table 6 (Base models @~8B),,,,,,,78.18,,,61.1,,,,60.7,32.83,,90.22,,,,60.26,,,,,55.64,,69.0,74.68,,79.66,51.16,76.64,78.34,,,,,54.79,,,30.68,,,,
Qwen/Qwen3-8B-Base,Non-thinking,Qwen,No,Qwen3 Report Table 6 (Base models @~8B),,,,,,,78.4,,,62.0,,,,67.65,44.44,,89.84,,,,59.4,,,,,60.8,,69.8,76.02,,76.89,56.73,72.17,75.72,,,,,58.75,,,31.64,,,,
Google/Gemma-3-4B-Base,Non-thinking,Gemma,No,Qwen3 Report Table 7 (Base models @~4B),,,,,,,51.7,,,34.0,,,,43.23,24.24,,43.97,,,,49.06,,,,,26.1,,46.4,33.11,,59.51,29.23,56.91,59.62,,,,,28.06,,,17.68,,,,
Qwen/Qwen2.5-3B-Base,Non-thinking,Qwen,No,Qwen3 Report Table 7 (Base models @~4B),,,,,,,56.3,,,36.5,,,,46.28,26.26,,79.08,,,,45.9,,,,,42.64,,54.6,47.53,,65.62,34.61,63.68,65.55,,,,,39.65,,,20.31,,,,
Qwen/Qwen3-4B-Base,Non-thinking,Qwen,No,Qwen3 Report Table 7 (Base models @~4B),,,,,,,72.59,,,55.0,,,,63.53,36.87,,87.79,,,,56.29,,,,,54.1,,67.0,67.74,,72.99,50.58,72.79,71.42,,,,,53.13,,,28.43,,,,
OpenAI/OpenAI-o1,Thinking,OpenAI,Unknown,Qwen3 Report Table 11 (235B-A22B Thinking),74.3,79.2,,8.86,92.1,79.8,,67.8,85.5,,96.7,1891,81.7,,,78.0,,,,92.6,84.6,,75.7,,63.9,,96.4,,,75.5,,,92.8,,88.4,67.4,,48.8,,38.9,,,,,7.69,81.0
DeepSeek/DeepSeek-R1,Thinking,DeepSeek,Unknown,Qwen3 Report Table 11 (Thinking),79.8,70.0,,8.76,92.3,86.1,,56.9,91.8,,98.1,2029,85.5,,,71.5,,,,83.3,82.7,,71.6,,64.3,,97.3,,,73.8,,,92.9,,86.4,73.5,,67.7,,47.1,,,,,7.71,78.7
Anthropic/Grok-3-Beta,Thinking,Anthropic,Unknown,Qwen3 Report Table 11 (235B-A22B Thinking),,77.3,,,,,,,,,,,,,,84.0,,,,,,,,,70.6,,83.9,,,,,,80.2,,,,,,,,,,,,,
Google/Gemini2.5-Pro,Thinking,Google,Unknown,Qwen3 Report Table 11 (235B-A22B Thinking),86.7,86.7,,9.03,96.4,85.4,,62.9,82.9,,97.9,2001,86.0,,,84.0,,,,89.5,85.1,,82.4,,70.4,,92.0,,,75.6,,,93.7,,86.9,76.9,,77.8,,52.2,,,,,8.09,87.4
OpenAI/GPT-4o-2024-11-20,Non-thinking,OpenAI,Yes,Qwen3 Report Table 12 (Non-thinking),11.1,7.6,,8.42,85.3,65.9,,72.5,75.5,,35.4,864,81.1,,,46.0,,,,86.5,78.8,,52.2,,32.7,,77.2,,,57.4,,,87.0,,80.3,9.2,,65.6,,12.0,,,,,7.11,27.4
DeepSeek/DeepSeek-V3,Non-thinking,DeepSeek,Unknown,Qwen3 Report Table 12 (Non-thinking),39.2,28.8,,8.64,85.5,76.1,,57.6,86.5,,54.1,1134,74.0,,,59.1,,,,86.1,76.7,,60.5,,33.1,,90.2,,,59.3,,,89.1,,81.2,10.2,,65.3,,29.2,,,,,6.49,42.1
Llama/Llama-4-Maverick,Non-thinking,Llama,Unknown,Qwen3 Report Table 12 (235B-A22B Non-thinking),38.5,15.9,,7.89,82.7,75.2,,52.9,83.5,,24.3,712,61.3,,,69.8,,,,86.7,80.9,,59.5,,37.2,,90.6,,,47.6,,,91.8,,82.5,27.0,,75.5,,50.7,,,,,5.46,40.0
OpenAI/o1,Thinking,OpenAI,Unknown,Qwen3 Report Table 11 (Thinking),74.3,79.2,,8.86,92.1,79.8,,67.8,85.5,,96.7,1891,81.7,,,78.0,,,,92.6,84.6,,75.7,,63.9,,96.4,,,75.5,,,92.8,,88.4,67.4,,48.8,,38.9,,,,,7.69,81.0
xAI/Grok-3-Beta,Thinking,xAI,Unknown,Qwen3 Report Table 11 (Thinking),77.3,77.3,,,,,,,,,,,,,,80.2,,,,,,,,,70.6,,83.9,,,,,,,,,,,,,,,,,,,
Google/Gemini-2.5-Pro,Thinking,Google,Unknown,Qwen3 Report Table 11 (Thinking),86.7,86.7,,9.03,96.4,85.4,,62.9,82.9,,97.9,2001,86.0,,,84.0,,,,89.5,85.1,,82.4,,70.4,,92.0,,,75.6,,,93.7,,86.9,76.9,,77.8,,52.2,,,,,8.09,86.7
Llama/LLaMA-4-Maverick,Non-thinking,Llama,Unknown,Qwen3 Report Table 12 (Non-thinking),38.5,15.9,,7.89,82.7,75.2,,52.9,83.5,,24.3,712,61.3,,,69.8,,,,86.7,69.0,,59.5,,17.2,,90.6,,,47.0,,,91.8,,82.5,27.0,,75.5,,27.0,,,,,5.46,40.0
DeepSeek/DeepSeek-R1-Distill-Qwen-32B,Thinking,DeepSeek,Yes,"Qwen3 Report Table 17 (Thinking, Qwen3-8B / Qwen3-4B)",,,,,,,,,82.2,,,,,,,62.1,,,,,,,45.6,,,,,,,,,,88.2,,,,,,,,,,,,,
Google/Gemma-3-27B-IT,Non-thinking,Gemma,Yes,"Qwen3 Report Table 16 (Non-thinking, Qwen3-30B-A3B / Qwen3-14B / Gemma-3-27B)",,,,,,,,,66.9,,,,,,,42.4,,,,,,,41.6,,,,,,,,,,82.6,,,,,,,,,,,,,
DeepSeek/DeepSeek-R1-Distill-Qwen-14B,Thinking,DeepSeek,Yes,"Qwen3 Report Table 17 (Thinking, Qwen3-8B / Qwen3-4B)",,,,,,,,,78.1,,,,,,,59.1,,,,,,,52.3,,,,,,,,,,84.1,,,,,,,,,,,,,
Llama/LLaMA-3.1-8B-Instruct,Non-thinking,Llama,Yes,"Qwen3 Report Table 18 (Non-thinking, Qwen3-8B / Qwen3-4B / LLaMA-3.1-8B)",,,,,,,,,52.0,,,,,,,32.8,,,,,,,26.0,,,,,,,,,,61.7,,,,,,,,,,,,,
Google/Gemma-3-12B-IT,Non-thinking,Gemma,Yes,"Qwen3 Report Table 18 (Non-thinking, Qwen3-8B / Qwen3-4B / LLaMA-3.1-8B)",,,,,,,,,61.1,,,,,,,40.9,,,,,,,43.7,,,,,,,,,,77.8,,,,,,,,,,,,,
DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B,Thinking,DeepSeek,Yes,"Qwen3 Report Table 19 (Thinking, Qwen3-1.7B / Qwen3-0.6B)",,,,,,,,,27.1,,,,,,,33.8,,,,,,,24.9,,,,,,,,,,45.4,,,,,,,,,,,,,
DeepSeek/DeepSeek-R1-Distill-LLaMA-8B,Thinking,DeepSeek,Yes,"Qwen3 Report Table 19 (Thinking, Qwen3-1.7B / Qwen3-0.6B)",,,,,,,,,50.4,,,,,,,49.0,,,,,,,40.6,,,,,,,,,,66.4,,,,,,,,,,,,,
Qwen/Qwen3-0.6B,Non-thinking,Qwen,No,"Qwen3 Report Table 20 (Non-thinking, Qwen3-1.7B / Qwen3-0.6B)",,,,,,,,,42.6,,,,,,,22.9,,,,,,,21.8,,,,,,,,,,44.6,,,,,,,,,,,,,
Qwen/Qwen3-1.7B,Non-thinking,Qwen,No,"Qwen3 Report Table 20 (Non-thinking, Qwen3-1.7B / Qwen3-0.6B)",,,,,,,,,61.0,,,,,,,28.6,,,,,,,35.6,,,,,,,,,,64.4,,,,,,,,,,,,,
Google/Gemma-3-1B-IT,Non-thinking,Gemma,Yes,"Qwen3 Report Table 20 (Non-thinking, Qwen3-1.7B / Qwen3-0.6B)",,,,,,,,,28.5,,,,,,,19.2,,,,,,,14.4,,,,,,,,,,33.3,,,,,,,,,,,,,
Google/Phi-4-mini,Non-thinking,Phi,Yes,"Qwen3 Report Table 20 (Non-thinking, Qwen3-1.7B / Qwen3-0.6B)",,,,,,,,,40.0,,,,,,,25.2,,,,,,,25.3,,,,,,,,,,67.9,,,,,,,,,,,,,
Qwen/Qwen2.5-1.5B-Instruct,Non-thinking,Qwen,Yes,"Qwen3 Report Table 20 (Non-thinking, Qwen3-1.7B / Qwen3-0.6B)",,,,,,,,,53.3,,,,,,,29.8,,,,,,,18.0,,,,,,,,,,50.7,,,,,,,,,,,,,
Qwen/Qwen2.5-3B-Instruct,Non-thinking,Qwen,Yes,"Qwen3 Report Table 20 (Non-thinking, Qwen3-1.7B / Qwen3-0.6B)",,,,,,,,,68.2,,,,,,,30.3,,,,,,,23.8,,,,,,,,,,64.4,,,,,,,,,,,,,
Qwen/Qwen1.5-32B,Non-thinking,Qwen,No,Qwen3 Report Table 3 (Base models 14B-30B+),,,63.6,,,,66.8,,,,,,,,,,,85.0,,,,,,,,,,,,,74.3,44.1,69.0,,,,,,,,,,57.4,81.5,,
Google/Gemma-3-27B,Non-thinking,Gemma,No,Qwen3 Report Table 3 (Base models 14B-30B+),,,71.4,,,,74.9,,,,,,,,,,,86.4,,,,,,,,,,,,,75.2,49.1,74.1,,,,,,,,,,40.1,59.7,,
Yi/Yi-1.5-34B,Non-thinking,Yi,No,Qwen3 Report Table 3 (Base models 14B-30B+),,,65.6,,,,76.4,,,,,,,,,,,85.9,,,,,,,,,,,,,77.2,48.3,74.1,,,,,,,,,,53.9,84.9,,
Qwen/Qwen2.5-Turbo,Non-thinking,Qwen,Yes,Qwen3 Report Table 3 (Base models 14B-30B+),,,67.8,,,,76.1,,,,,,,,,,,85.0,,,,,,,,,,,,,79.5,55.6,77.1,,,,,,,,,,56.3,81.1,,
Qwen/Qwen2.5-14B,Non-thinking,Qwen,No,Qwen3 Report Table 3 (Base models 14B-30B+),,,67.3,,,,78.2,,,,,,,,,,,84.3,,,,,,,,,,,,,79.7,51.2,76.6,,,,,,,,,,58.4,81.0,,
Qwen/Qwen2.5-32B,Non-thinking,Qwen,No,Qwen3 Report Table 3 (Base models 14B-30B+),,,70.4,,,,84.5,,,,,,,,,,,85.2,,,,,,,,,,,,,83.3,55.1,82.0,,,,,,,,,,57.8,82.0,,
Qwen/Qwen2-0.5B,Non-thinking,Qwen,No,Qwen3 Report Table 5 (Base models smaller),,,31.0,,,,18.2,,,,,,,,,,,49.1,,,,,,,,,,,,,44.3,14.7,40.7,,,,,,,,,,39.7,56.9,,
Qwen/Qwen2.5-0.5B,Non-thinking,Qwen,No,Qwen3 Report Table 5 (Base models smaller),,,35.6,,,,20.3,,,,,,,,,,,52.1,,,,,,,,,,,,,47.5,15.7,45.1,,,,,,,,,,40.2,56.3,,
Qwen/Qwen2-1.5B,Non-thinking,Qwen,No,Qwen3 Report Table 5 (Base models smaller),,,43.7,,,,36.5,,,,,,,,,,,67.0,,,,,,,,,,,,,55.9,21.6,51.8,,,,,,,,,,45.9,65.0,,
Qwen/Qwen2.5-1.5B,Non-thinking,Qwen,No,Qwen3 Report Table 5 (Base models smaller),,,54.7,,,,45.1,,,,,,,,,,,67.9,,,,,,,,,,,,,60.9,28.5,58.5,,,,,,,,,,46.6,65.0,,
Google/Gemma-2-6B,Non-thinking,Gemma,No,Qwen3 Report Table 5 (Base models smaller),,,55.7,,,,41.9,,,,,,,,,,,74.6,,,,,,,,,,,,,52.2,23.0,50.9,,,,,,,,,,36.2,71.5,,
Qwen/Qwen2.5-3B,Non-thinking,Qwen,No,Qwen3 Report Table 5 (Base models smaller),,,56.5,,,,56.3,,,,,,,,,,,74.6,,,,,,,,,,,,,65.6,34.6,63.7,,,,,,,,,,48.9,71.1,,
Google/Gemma-2-9B,Non-thinking,Gemma,Yes,Qwen3 Report Table 8 (Base models ~7B instruct),,,,,41.6,,,,,,,,,,32.8,,76.7,,68.9,70.1,,30.6,,18.9,,44.3,,74.9,,,,52.1,72.8,,,,8.49,,53.4,,,,,,,
Llama/LLaMA-3.1-8B,Non-thinking,Llama,Yes,Qwen3 Report Table 8 (Base models ~7B instruct),,,,,27.8,,,,,,,,,,32.8,,84.5,,72.6,75.9,,26.7,,8.3,,51.9,,69.6,,,,48.3,67.2,,,,8.23,,50.7,,,,,,,
Qwen/Qwen2-7B,Non-thinking,Qwen,Yes,Qwen3 Report Table 8 (Base models ~7B instruct),,,,,25.0,,,,,,,,,,34.3,,85.7,,79.9,54.7,,29.2,,23.9,,52.9,,67.2,,,,44.1,67.3,,,,8.26,,59.1,,,,,,,
Qwen/Qwen2.5-7B,Non-thinking,Qwen,Yes,Qwen3 Report Table 8 (Base models ~7B instruct),,,,,52.0,,,,,,,,,,36.4,,91.6,,84.8,71.2,,35.9,,28.7,,75.5,,79.2,,,,56.3,75.4,,,,8.75,,70.4,,,,,,,
