Model,ABench-OA,ABench-Code_OS,ABench-Code_DB,ABench-Code_KG,ABench-Game_DCG,ABench-Game_LTP,ABench-Game_HH,ABench-Web_WS,ABench-Web_WB,ABoard-Embodied AI_ALF_PR,ABoard-Embodied AI_ALF_SR,ABoard-Embodied AI_SW_PR,ABoard-Embodied AI_SW_SR,ABoard-Embodied AI_BA_PR,ABoard-Embodied AI_BA_SR,ABoard-Game_JC_PR,ABoard-Game_JC_SR,ABoard-Game_PL_PR,ABoard-Game_PL_SR,ABoard-Web_WS_PR,ABoard-Web_WS_SR,ABoard-Web_WA_PR,ABoard-Web_WA_SR,ABoard-Tool_TQ_PR,ABoard-Tool_TQ_SR,ABoard-Tool_TO_PR,ABoard-Tool_TO_SR,ABoard-Avg_PR,ABoard-Avg_SR
gpt-4-0613,4.01,42.4,32.0,58.8,74.5,16.6,78.0,61.1,29.0,,,,,,,,,,,,,,,,,,,,
claude-2.0,2.49,18.1,27.3,41.3,55.5,8.4,54.0,61.4,0.0,34.1,24.6,32.0,11.1,48.1,37.5,20.4,0.0,61.4,40.0,74.6,37.8,36.4,8.6,73.5,48.3,59.6,27.5,48.9,26.2
claude-1.3,2.44,9.7,22.0,38.9,40.9,8.2,58.0,55.7,25.0,,,,,,,,,,,,,,,,,,,,
gpt-3.5-turbo-0613,2.32,32.6,36.7,25.9,33.7,10.5,16.0,64.1,20.0,35.6,17.2,31.9,18.9,51.7,39.3,19.9,5.0,25.0,5.0,76.4,35.1,25.5,4.6,69.4,45.0,37.2,7.5,41.4,19.7
claude-instant-1.1,1.6,16.7,18.0,20.8,5.9,12.6,30.0,49.7,4.0,,,,,,,,,,,,,,,,,,,,
codellama-34b-instruct,0.96,2.8,14.0,23.5,8.4,0.7,4.0,52.1,20.0,11.3,3.0,3.5,0.0,19.9,13.4,15.5,0.0,18.5,3.3,71.7,23.5,21.2,4.1,60.0,13.3,48.8,7.5,30.0,7.6
vicuna-13b-v1.5,0.93,10.4,6.7,9.4,0.1,8.0,8.0,41.7,12.0,,,,,,,,,,,,,,,,,,,,
llama-2-70b-chat,0.78,9.7,13.0,8.0,21.3,0.0,2.0,5.6,19.0,13.2,3.0,2.6,0.0,30.0,19.6,7.8,0.0,8.1,1.7,53.6,13.1,11.6,3.3,48.3,0.0,38.6,0.0,23.8,4.5
llama-2-13b-chat,0.77,4.2,11.7,3.6,26.4,0.0,6.0,25.3,13.0,7.8,0.0,1.1,0.0,18.1,6.2,3.2,0.0,4.1,0.0,63.5,10.8,7.9,2.0,35.1,0.0,29.3,0.0,18.9,2.1
vicuna-33b-v1.3,0.73,15.3,11.0,1.2,16.3,1.0,6.0,23.9,7.0,,,,,,,,,,,,,,,,,,,,
openchat-13b-v3.2,0.7,15.3,12.3,5.5,0.1,0.0,0.0,46.9,15.0,,,,,,,,,,,,,,,,,,,,
wizardlm-13b-v1.2,0.66,9.0,12.7,1.7,1.9,0.0,10.0,43.7,12.0,,,,,,,,,,,,,,,,,,,,
codellama-13b-instruct,0.56,3.5,9.7,10.4,0.0,0.0,0.0,43.8,14.0,13.4,2.2,9.6,2.2,22.2,17.0,0.0,0.0,9.3,1.7,65.5,25.9,17.7,3.7,52.5,25.0,41.8,12.5,25.8,10.0
vicuna-7b-v1.5,0.56,9.7,8.7,2.5,0.3,6.4,0.0,2.2,9.0,,,,,,,,,,,,,,,,,,,,
guanaco-65b,0.54,8.3,14.7,1.9,0.1,1.5,12.0,0.9,10.0,,,,,,,,,,,,,,,,,,,,
codellama-7b-instruct,0.5,4.9,12.7,8.2,0.0,0.0,2.0,25.2,12.0,,,,,,,,,,,,,,,,,,,,
wizardlm-30b-v1.0,0.46,13.9,12.7,2.9,0.3,1.8,6.0,4.4,1.0,,,,,,,,,,,,,,,,,,,,
guanaco-33b,0.39,11.1,9.3,3.2,0.3,0.0,6.0,6.2,5.0,,,,,,,,,,,,,,,,,,,,
koala-13b,0.34,3.5,5.0,0.4,0.1,4.4,0.0,3.9,7.0,,,,,,,,,,,,,,,,,,,,
llama-2-7b-chat,0.34,4.2,8.0,2.1,6.9,0.0,0.0,11.6,7.0,,,,,,,,,,,,,,,,,,,,
dolly-v2-12b,0.14,0.0,0.0,0.0,0.1,1.2,0.0,0.4,9.0,,,,,,,,,,,,,,,,,,,,
oasst-sft-4-pythia-12b-epoch-3.5,0.03,1.4,0.0,0.0,0.0,0.0,0.0,0.3,1.0,,,,,,,,,,,,,,,,,,,,
gpt-4-0314,,,,,,,,,,65.5,43.3,78.8,52.2,70.7,56.2,52.4,35.0,81.2,61.7,76.5,39.0,39.4,15.1,85.1,68.3,80.8,60.0,70.0,47.9
deepseek-llm-67b-chat,,,,,,,,,,34.5,20.9,36.1,10.0,31.7,22.3,13.7,0.0,22.0,6.7,72.7,31.9,23.9,5.7,71.4,40.0,40.5,17.5,38.5,17.2
lemur-70b-chat-v1,,,,,,,,,,10.8,0.7,33.4,5.6,19.4,9.8,10.1,0.0,9.7,3.3,71.8,11.6,12.2,3.3,72.0,28.3,37.7,12.5,30.8,8.3
mistral-7b-instruct-v0.1,,,,,,,,,,9.8,0.0,15.8,2.2,20.1,14.3,11.0,0.0,4.7,0.0,68.2,13.9,13.2,1.3,51.0,3.3,27.2,0.0,24.6,3.9
vicuna-13b-16k,,,,,,,,,,11.0,1.5,14.1,2.2,14.3,5.4,15.2,0.0,7.2,1.7,73.3,21.9,11.3,2.9,34.3,3.3,26.9,0.0,23.1,4.3
