Model,Model Family,Model Size (B),Pretraining Data Size (T),FLOPs (1E21),Arena-Elo,MTBench,MMLU,ARC-C,HellaSwag,Winogrande,TruthfulQA,GSM8K,HumanEval,ABench-OA,ABench-Code_OS,ABench-Code_DB,ABench-Code_KG,ABench-Game_DCG,ABench-Game_LTP,ABench-Game_HH,ABench-Web_WS,ABench-Web_WB,ABoard-Embodied AI_ALF_PR,ABoard-Embodied AI_ALF_SR,ABoard-Embodied AI_SW_PR,ABoard-Embodied AI_SW_SR,ABoard-Embodied AI_BA_PR,ABoard-Embodied AI_BA_SR,ABoard-Game_JC_PR,ABoard-Game_JC_SR,ABoard-Game_PL_PR,ABoard-Game_PL_SR,ABoard-Web_WS_PR,ABoard-Web_WS_SR,ABoard-Web_WA_PR,ABoard-Web_WA_SR,ABoard-Tool_TQ_PR,ABoard-Tool_TQ_SR,ABoard-Tool_TO_PR,ABoard-Tool_TO_SR,ABoard-Avg_PR,ABoard-Avg_SR
gpt-4-0613,GPT-4,,,,0.9239849037815429,0.918,0.864,0.963,0.953,0.875,0.59,0.92,0.8719512195121951,4.01,42.4,32.0,58.8,74.5,16.6,78.0,61.1,29.0,,,,,,,,,,,,,,,,,,,,
claude-2.0,Claude-2,,,,0.8440020064643946,0.806,0.785,0.91,,,0.69,0.88,0.6707317073170732,2.49,18.1,27.3,41.3,55.5,8.4,54.0,61.4,0.0,34.1,24.6,32.0,11.1,48.1,37.5,20.4,0.0,61.4,40.0,74.6,37.8,36.4,8.6,73.5,48.3,59.6,27.5,48.9,26.2
claude-1.3,Claude-1,,,,0.8904131199472344,0.79,0.77,0.9,,,0.62,0.852,0.6158536585365854,2.44,9.7,22.0,38.9,40.9,8.2,58.0,55.7,25.0,,,,,,,,,,,,,,,,,,,,
gpt-3.5-turbo-0613,GPT-3.5-Turbo,,,,0.8052825755815128,0.8390000000000001,0.7,0.852,0.855,0.816,0.47,0.571,0.774390243902439,2.32,32.6,36.7,25.9,33.7,10.5,16.0,64.1,20.0,35.6,17.2,31.9,18.9,51.7,39.3,19.9,5.0,25.0,5.0,76.4,35.1,25.5,4.6,69.4,45.0,37.2,7.5,41.4,19.7
claude-instant-1.1,Claude-Instant,,,,0.7817296940054941,0.7849999999999999,0.734,0.857,,,0.66,0.809,0.5914634146341463,1.6,16.7,18.0,20.8,5.9,12.6,30.0,49.7,4.0,,,,,,,,,,,,,,,,,,,,
codellama-34b-instruct,Codellama-Instruct,34.0,2.52,514.08,0.6017394832861076,,0.5462171392568606,0.5426621160409556,0.7691694881497709,0.745067087608524,0.4443753863305565,0.2304776345716452,0.4878048780487805,0.96,2.8,14.0,23.5,8.4,0.7,4.0,52.1,20.0,11.3,3.0,3.5,0.0,19.9,13.4,15.5,0.0,18.5,3.3,71.7,23.5,21.2,4.1,60.0,13.3,48.8,7.5,30.0,7.6
vicuna-13b-v1.5,Vicuna,13.0,2.0,156.0,0.5933354456961571,0.657,0.5623896507251537,0.5656996587030717,0.8108942441744672,0.7466456195737964,0.5107449529759277,0.112964366944655,0.2134146341463414,0.93,10.4,6.7,9.4,0.1,8.0,8.0,41.7,12.0,,,,,,,,,,,,,,,,,,,,
llama-2-70b-chat,Llama-2-Chat,70.0,2.0,840.0,0.706849517866438,0.686,0.6345350976408146,0.6459044368600683,0.8587930691097391,0.8050513022888713,0.5280473232260097,0.266868840030326,0.3170731707317073,0.78,9.7,13.0,8.0,21.3,0.0,2.0,5.6,19.0,13.2,3.0,2.6,0.0,30.0,19.6,7.8,0.0,8.1,1.7,53.6,13.1,11.6,3.3,48.3,0.0,38.6,0.0,23.8,4.5
llama-2-13b-chat,Llama-2-Chat,13.0,2.0,156.0,0.5973947873962169,0.665,0.5411813543925467,0.590443686006826,0.8193586934873531,0.745067087608524,0.4411794590119937,0.1523881728582259,0.1829268292682926,0.77,4.2,11.7,3.6,26.4,0.0,6.0,25.3,13.0,7.8,0.0,1.1,0.0,18.1,6.2,3.2,0.0,4.1,0.0,63.5,10.8,7.9,2.0,35.1,0.0,29.3,0.0,18.9,2.1
vicuna-33b-v1.3,Vicuna,33.0,2.0,396.0,0.7379703820300949,0.712,0.5920611158431873,0.6160409556313993,0.8306114319856602,0.7703235990528808,0.5609196687506864,0.1372251705837756,0.2134146341463414,0.73,15.3,11.0,1.2,16.3,1.0,6.0,23.9,7.0,,,,,,,,,,,,,,,,,,,,
openchat-13b-v3.2,OpenChat,13.0,2.0,156.0,,,0.5668268469601176,0.5964163822525598,0.8268273252340171,0.7695343330702447,0.4448949611779454,0.136467020470053,0.2073170731707317,0.7,15.3,12.3,5.5,0.1,0.0,0.0,46.9,15.0,,,,,,,,,,,,,,,,,,,,
wizardlm-13b-v1.2,WizardLM,13.0,2.0,156.0,0.6416716945110327,0.72,0.5366855761436067,0.590443686006826,0.8221469826727743,0.7190213101815311,0.4726776331987168,0.134950720242608,0.3902439024390244,0.66,9.0,12.7,1.7,1.9,0.0,10.0,43.7,12.0,,,,,,,,,,,,,,,,,,,,
codellama-13b-instruct,Codellama-Instruct,13.0,2.52,196.56,,,0.3888705360076342,0.4453924914675768,0.6492730531766581,0.6803472770323599,0.4587866352956375,0.1266110689916603,0.4451219512195122,0.56,3.5,9.7,10.4,0.0,0.0,0.0,43.8,14.0,13.4,2.2,9.6,2.2,22.2,17.0,0.0,0.0,9.3,1.7,65.5,25.9,17.7,3.7,52.5,25.0,41.8,12.5,25.8,10.0
vicuna-7b-v1.5,Vicuna,7.0,2.0,84.0,0.4968563576457123,0.617,0.503115717432969,0.5324232081911263,0.7739494124676359,0.7213891081294396,0.5032970216179807,0.0818802122820318,0.1341463414634146,0.56,9.7,8.7,2.5,0.3,6.4,0.0,2.2,9.0,,,,,,,,,,,,,,,,,,,,
guanaco-65b,Guanaco,65.0,1.4,546.0,,0.641,0.6250923728629318,0.6544368600682594,0.8646683927504482,0.823993685872139,0.5281436462656385,0.2600454890068233,0.274390243902439,0.54,8.3,14.7,1.9,0.1,1.5,12.0,0.9,10.0,,,,,,,,,,,,,,,,,,,,
codellama-7b-instruct,Codellama-Instruct,7.0,2.52,105.84,,,0.3454383560164048,0.3651877133105802,0.554371639115714,0.6456195737963694,0.4124873434135902,0.0796057619408642,0.3963414634146341,0.5,4.9,12.7,8.2,0.0,0.0,2.0,25.2,12.0,,,,,,,,,,,,,,,,,,,,
wizardlm-30b-v1.0,WizardLM,30.0,3.0,540.0,,0.701,0.5887559041088344,0.6254266211604096,0.8327026488747261,0.7750591949486977,0.5248739406679576,0.2183472327520849,,0.46,13.9,12.7,2.9,0.3,1.8,6.0,4.4,1.0,,,,,,,,,,,,,,,,,,,,
guanaco-33b,Guanaco,33.0,1.4,277.2,0.5703230253618208,0.653,0.5569047350729348,0.6245733788395904,0.8447520414260108,,0.5121992740888713,,0.2621951219512195,0.39,11.1,9.3,3.2,0.3,0.0,6.0,6.2,5.0,,,,,,,,,,,,,,,,,,,,
koala-13b,Koala,13.0,1.0,78.0,0.389950060715602,0.5349999999999999,0.4501103446384098,0.5298634812286689,0.7759410476000796,0.7403314917127072,0.5022661446867869,0.0682335102350265,0.1219512195121951,0.34,3.5,5.0,0.4,0.1,4.4,0.0,3.9,7.0,,,,,,,,,,,,,,,,,,,,
llama-2-7b-chat,Llama-2-Chat,7.0,2.0,84.0,0.549140618077077,0.627,0.4705941276505219,0.5290102389078498,0.7855008962358097,0.7174427782162589,0.4557037019510113,0.0735405610310841,0.1219512195121951,0.34,4.2,8.0,2.1,6.9,0.0,0.0,11.6,7.0,,,,,,,,,,,,,,,,,,,,
dolly-v2-12b,Dolly-v2,12.0,0.3,21.6,0.0,0.3279999999999999,0.258084228257613,0.4240614334470989,0.7252539334793866,0.6085240726124704,0.3382708132669227,0.0121304018195602,0.0,0.14,0.0,0.0,0.0,0.1,1.2,0.0,0.4,9.0,,,,,,,,,,,,,,,,,,,,
oasst-sft-4-pythia-12b-epoch-3.5,Oasst-SFT,12.0,0.3,21.6,,,0.2681816525296633,0.4573378839590443,0.6859191396136228,0.659037095501184,0.3780771940672697,0.0303260045489006,0.0792682926829268,0.03,1.4,0.0,0.0,0.0,0.0,0.0,0.3,1.0,,,,,,,,,,,,,,,,,,,,
gpt-4-0314,GPT-4,,,,1.0,0.8960000000000001,0.864,0.963,0.953,0.875,0.59,0.92,0.902439024390244,,,,,,,,,,65.5,43.3,78.8,52.2,70.7,56.2,52.4,35.0,81.2,61.7,76.5,39.0,39.4,15.1,85.1,68.3,80.8,60.0,70.0,47.9
deepseek-llm-67b-chat,Deepseek-LLM-Chat,67.0,2.0,804.0,0.7061227564095748,,0.7174372200611983,0.6774744027303754,0.8679545907189803,0.8421468034727704,0.5583209009287327,0.623199393479909,0.7012195121951219,,,,,,,,,,34.5,20.9,36.1,10.0,31.7,22.3,13.7,0.0,22.0,6.7,72.7,31.9,23.9,5.7,71.4,40.0,40.5,17.5,38.5,17.2
lemur-70b-chat-v1,Lemur-Chat,70.0,2.09,877.8,,,0.6599154521327986,0.6697952218430034,0.8572993427604063,0.8168902920284136,0.5657669903989726,0.3532979529946929,0.5914634146341463,,,,,,,,,,10.8,0.7,33.4,5.6,19.4,9.8,10.1,0.0,9.7,3.3,71.8,11.6,12.2,3.3,72.0,28.3,37.7,12.5,30.8,8.3
mistral-7b-instruct-v0.1,Mistral-Instruct,7.0,,,0.5009779825696534,0.6839999999999999,0.5538773717731529,0.5452218430034129,0.7563234415455089,0.7371744277821626,0.5628382292113293,0.1425322213798332,0.3536585365853658,,,,,,,,,,9.8,0.0,15.8,2.2,20.1,14.3,11.0,0.0,4.7,0.0,68.2,13.9,13.2,1.3,51.0,3.3,27.2,0.0,24.6,3.9
vicuna-13b-16k,Vicuna,13.0,2.0,156.0,,0.692,0.5488751813586674,0.5674061433447098,0.8037243576976698,0.728492501973165,0.5196096489849312,0.133434420015163,0.25,,,,,,,,,,11.0,1.5,14.1,2.2,14.3,5.4,15.2,0.0,7.2,1.7,73.3,21.9,11.3,2.9,34.3,3.3,26.9,0.0,23.1,4.3
