Model,Model Family,Model Size (B),Pretraining Data Size (T),FLOPs (1E21),Arena-Elo,MTBench,MMLU,ARC-C,HellaSwag,Winogrande,TruthfulQA,GSM8K,HumanEval
gpt-4-0613,GPT-4,,,,0.9239849037815429,0.9179999999999999,0.864,0.963,0.953,0.875,0.59,0.92,0.8719512195121951
claude-2.0,Claude-2,,,,0.8440020064643946,0.806,0.785,0.91,,,0.69,0.88,0.6707317073170732
claude-1.3,Claude-1,,,,0.8904131199472344,0.79,0.77,0.9,,,0.62,0.852,0.6158536585365854
gpt-3.5-turbo-0613,GPT-3.5-Turbo,,,,0.8052825755815128,0.8390000000000001,0.7,0.852,0.855,0.816,0.47,0.571,0.774390243902439
claude-instant-1.1,Claude-Instant,,,,0.7817296940054941,0.7849999999999999,0.734,0.857,,,0.66,0.809,0.5914634146341463
codellama-34b-instruct,Codellama-Instruct,34.0,2.52,514.08,0.6017394832861076,,0.5462171392568606,0.5426621160409556,0.7691694881497709,0.745067087608524,0.4443753863305565,0.2304776345716452,0.4878048780487805
vicuna-13b-v1.5,Vicuna,13.0,2.0,156.0,0.5933354456961571,0.657,0.5623896507251537,0.5656996587030717,0.8108942441744672,0.7466456195737964,0.5107449529759277,0.112964366944655,0.2134146341463414
llama-2-70b-chat,Llama-2-Chat,70.0,2.0,840.0,0.706849517866438,0.686,0.6345350976408146,0.6459044368600683,0.8587930691097391,0.8050513022888713,0.5280473232260097,0.266868840030326,0.3170731707317073
llama-2-13b-chat,Llama-2-Chat,13.0,2.0,156.0,0.5973947873962169,0.665,0.5411813543925467,0.590443686006826,0.8193586934873531,0.745067087608524,0.4411794590119937,0.1523881728582259,0.1829268292682926
vicuna-33b-v1.3,Vicuna,33.0,2.0,396.0,0.7379703820300949,0.712,0.5920611158431873,0.6160409556313993,0.8306114319856602,0.7703235990528808,0.5609196687506864,0.1372251705837756,0.2134146341463414
openchat-13b-v3.2,OpenChat,13.0,2.0,156.0,,,0.5668268469601176,0.5964163822525598,0.8268273252340171,0.7695343330702447,0.4448949611779454,0.136467020470053,0.2073170731707317
wizardlm-13b-v1.2,WizardLM,13.0,2.0,156.0,0.6416716945110327,0.72,0.5366855761436067,0.590443686006826,0.8221469826727743,0.7190213101815311,0.4726776331987168,0.134950720242608,0.3902439024390244
codellama-13b-instruct,Codellama-Instruct,13.0,2.52,196.56,,,0.3888705360076342,0.4453924914675768,0.6492730531766581,0.6803472770323599,0.4587866352956375,0.1266110689916603,0.4451219512195122
vicuna-7b-v1.5,Vicuna,7.0,2.0,84.0,0.49685635764571234,0.617,0.503115717432969,0.5324232081911263,0.7739494124676359,0.7213891081294396,0.5032970216179807,0.0818802122820318,0.1341463414634146
guanaco-65b,Guanaco,65.0,1.4,546.0,,0.641,0.6250923728629318,0.6544368600682594,0.8646683927504482,0.823993685872139,0.5281436462656385,0.2600454890068233,0.274390243902439
codellama-7b-instruct,Codellama-Instruct,7.0,2.52,105.84,,,0.3454383560164048,0.3651877133105802,0.554371639115714,0.6456195737963694,0.4124873434135902,0.0796057619408642,0.3963414634146341
wizardlm-30b-v1.0,WizardLM,30.0,3.0,540.0,,0.701,0.5887559041088344,0.6254266211604096,0.8327026488747261,0.7750591949486977,0.5248739406679576,0.2183472327520849,
guanaco-33b,Guanaco,33.0,1.4,277.2,0.5703230253618208,0.653,0.5569047350729348,0.6245733788395904,0.8447520414260108,,0.5121992740888713,,0.2621951219512195
koala-13b,Koala,13.0,1.0,78.0,0.38995006071560206,0.5349999999999999,0.4501103446384098,0.5298634812286689,0.7759410476000796,0.7403314917127072,0.5022661446867869,0.0682335102350265,0.1219512195121951
llama-2-7b-chat,Llama-2-Chat,7.0,2.0,84.0,0.549140618077077,0.627,0.4705941276505219,0.5290102389078498,0.7855008962358097,0.7174427782162589,0.4557037019510113,0.0735405610310841,0.1219512195121951
dolly-v2-12b,Dolly-v2,12.0,0.3,21.6,0.0,0.32799999999999996,0.258084228257613,0.4240614334470989,0.7252539334793866,0.6085240726124704,0.3382708132669227,0.0121304018195602,0.0
oasst-sft-4-pythia-12b-epoch-3.5,Oasst-SFT,12.0,0.3,21.6,,,0.2681816525296633,0.4573378839590443,0.6859191396136228,0.659037095501184,0.3780771940672697,0.0303260045489006,0.0792682926829268
gpt-4-0314,GPT-4,,,,1.0,0.8960000000000001,0.864,0.963,0.953,0.875,0.59,0.92,0.902439024390244
deepseek-llm-67b-chat,Deepseek-LLM-Chat,67.0,2.0,804.0,0.7061227564095748,,0.7174372200611983,0.6774744027303754,0.8679545907189803,0.8421468034727704,0.5583209009287327,0.623199393479909,0.7012195121951219
lemur-70b-chat-v1,Lemur-Chat,70.0,2.09,877.8,,,0.6599154521327986,0.6697952218430034,0.8572993427604063,0.8168902920284136,0.5657669903989726,0.3532979529946929,0.5914634146341463
mistral-7b-instruct-v0.1,Mistral-Instruct,7.0,,,0.5009779825696534,0.6839999999999999,0.5538773717731529,0.5452218430034129,0.7563234415455089,0.7371744277821626,0.5628382292113293,0.1425322213798332,0.3536585365853658
vicuna-13b-16k,Vicuna,13.0,2.0,156.0,,0.692,0.5488751813586674,0.5674061433447098,0.8037243576976698,0.728492501973165,0.5196096489849312,0.133434420015163,0.25
