Model,Model Family,dataset,eval_type,Model Size (B),node_precision,node_recall,node_f1,edge_precision,edge_recall,edge_f1,action_precision,action_recall,action_f1,all_precision,all_recall,all_f1,Pretraining Data Size (T),FLOPs (1E21),Average,BBH,MATH Lvl 5,GPQA,MUSR,MMLU-PRO,IFEval
meta-llama/Llama-3.3-70B-Instruct,Llama-3,virtualhome,goal_interpretation,70.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,6353.999999999999,44.84747145129876,56.561410788022194,48.338368580060425,10.514541387024611,15.565624999999999,48.12906323877069,89.97581971391463
01-ai/Yi-Coder-1.5B-Chat,Yi,virtualhome,goal_interpretation,1.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.4,21.6,,,,,,,
Qwen/Qwen1.5-1.8B,Qwen1.5,virtualhome,goal_interpretation,1.8,5.5556,3.4483,4.2553,2.5,5.2632,3.3898,5.3435,53.8462,9.7222,4.7619,14.7541,7.2,2.4,25.92,9.269492522098927,9.759901587727937,3.1722054380664653,7.38255033557047,3.963802083333334,9.79609929078014,21.542396397115212
mistralai/Mixtral-8x7B-Instruct-v0.1,Mistral,virtualhome,goal_interpretation,46.7,20.1717,45.6311,27.9762,17.1306,31.3725,22.1607,8.2171,79.1045,14.8876,13.3143,46.8481,20.7356,,,23.8171027058463,29.742398380967334,9.138972809667674,7.046979865771815,11.073697916666667,29.909131205673756,55.991436056330535
google/gemma-3-12b-it,Gemma,virtualhome,goal_interpretation,12.2,23.0961,54.4118,32.4277,35.4839,3.6913,6.6869,13.8235,87.037,23.8579,18.1965,42.125,25.4148,12.0,878.4,,,,,,,
google/gemma-3-4b-it,Gemma,virtualhome,goal_interpretation,4.3,22.1889,43.5294,29.3942,5.5556,6.3758,5.9375,11.0201,91.358,19.6678,13.3929,39.375,19.9873,4.0,103.2,,,,,,,
ibm-granite/granite-3.2-2b-instruct,Granite,virtualhome,goal_interpretation,2.5,14.9296,32.5153,20.4633,0.5917,1.7241,0.8811,7.281,83.1169,13.3891,7.214,31.039,11.7071,12.0,180.0,21.25014812377563,21.668268416036614,14.425981873111782,5.369127516778524,4.704947916666668,19.815676713947987,61.51688630611223
ibm-granite/granite-3.1-8b-instruct,Granite,virtualhome,goal_interpretation,8.2,23.2384,45.9941,30.8765,6.6202,12.8814,8.7457,7.6833,82.9114,14.0633,10.998,41.0127,17.3448,12.0,590.4,30.6030430081627,34.089655299414055,21.978851963746223,8.277404921700223,19.00520833333333,28.191489361702125,72.07564816908027
google/gemma-7b-it,Gemma,virtualhome,goal_interpretation,8.5,18.5185,71.4286,29.4118,12.0,23.0769,15.7895,3.7037,40.0,6.7797,9.434,40.0,15.2672,2.0,102.0,13.067087110466217,11.940832085290182,2.9456193353474323,4.5861297539149914,12.528385416666667,7.7183067375886525,38.68324933398937
01-ai/Yi-1.5-6B-Chat,Yi,virtualhome,goal_interpretation,6.1,19.883,45.6376,27.6986,1.7115,5.6,2.6217,13.8889,101.6949,24.4399,11.4117,40.5405,17.81,3.6,131.76,22.784006289829847,23.67872313235784,16.238670694864048,6.935123042505594,14.030468750000002,24.368351063829788,51.452701055421834
Qwen/Qwen1.5-14B,Qwen1.5,virtualhome,goal_interpretation,14.2,19.9029,53.7118,29.0437,5.4217,12.6761,7.5949,7.9023,54.4554,13.8018,11.3135,37.7532,17.4098,4.0,336.0,20.854080062460586,30.063103282917453,20.241691842900302,5.92841163310962,10.464062500000002,29.373522458628837,29.05368865720732
google/gemma-2b-it,Gemma,virtualhome,goal_interpretation,2.5,5.5398,16.7382,8.3244,7.3579,11.4583,8.9613,6.2724,38.8889,10.8025,6.1499,18.6408,9.2486,6.0,90.0,7.485804130315127,5.214303022163619,2.0392749244712993,3.8031319910514525,3.0322916666666675,3.9228723404255303,26.902950837112197
Qwen/Qwen3-14B,Qwen3,virtualhome,goal_interpretation,14.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0,3196.8,,,,,,,
google/gemma-2-9b,Gemma,virtualhome,goal_interpretation,9.2,100.0,100.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,33.3333,100.0,50.0,8.0,441.6,21.205286776100692,34.09681853589784,13.444108761329304,10.514541387024611,14.297656250000001,34.48027482269504,20.398320899657357
Qwen/Qwen1.5-72B,Qwen1.5,virtualhome,goal_interpretation,72.3,22.5931,55.873,32.1755,10.5516,16.7939,12.9602,9.5494,69.5312,16.7925,14.5207,43.8298,21.8143,3.0,1296.0,,,,,,,
openai/gpt-oss-120b,GPT-OSS,virtualhome,goal_interpretation,120.4,32.3529,58.7537,41.7281,52.2857,61.8243,56.6563,16.3373,77.0186,26.9565,29.3434,63.602,40.159,,,,,,,,,
Qwen/Qwen1.5-4B,Qwen1.5,virtualhome,goal_interpretation,4.0,25.3687,40.3756,31.1594,1.937,4.1451,2.6403,12.8028,34.5794,18.6869,12.5841,25.5361,16.8597,2.4,57.6,11.76818275851784,16.249142581095292,5.287009063444108,3.5794183445190177,4.8226562500000005,16.22340425531915,24.447466056729475
meta-llama/Llama-3.1-70B-Instruct,Llama-3,virtualhome,goal_interpretation,70.6,20.0,42.8571,27.2727,37.5,33.3333,35.2941,31.0345,100.0,47.3684,28.8462,60.0,38.961,15.0,6353.999999999999,43.409948245645786,55.92799173898473,38.066465256797585,14.205816554809845,17.691145833333334,47.87972813238771,86.6885419575615
Qwen/Qwen-72B,Qwen,virtualhome,goal_interpretation,72.3,21.8155,49.3377,30.2538,10.4895,17.0455,12.987,9.899,70.0,17.3451,13.8915,41.3598,20.7977,3.0,1296.0,,,,,,,
ibm-granite/granite-3.3-2b-base,Granite,virtualhome,goal_interpretation,2.5,16.0,28.8,20.5714,2.0305,4.1667,2.7304,7.231,73.2143,13.1621,8.1901,29.2419,12.7962,12.0,180.0,,,,,,,
Qwen/Qwen3-32B,Qwen3,virtualhome,goal_interpretation,32.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0,7084.799999999999,,,,,,,
01-ai/Yi-1.5-9B,Yi,virtualhome,goal_interpretation,8.8,24.8322,44.5783,31.8966,10.0,17.8571,12.8205,10.8374,64.7059,18.5654,15.2655,39.8844,22.08,3.6,190.08000000000004,22.153901514184795,30.50071699492122,11.404833836858005,17.225950782997764,12.030989583333332,32.402482269503544,29.358435617494916
meta-llama/Llama-2-7b-hf,Llama-2,virtualhome,goal_interpretation,6.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,84.0,8.806357596540016,10.35141665784897,1.7371601208459215,2.2371364653243813,3.7578125,9.56523345153664,25.18938638368418
Qwen/Qwen3-8B,Qwen3,virtualhome,goal_interpretation,8.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0,1771.1999999999998,,,,,,,
google/gemma-2-27b-it,Gemma,virtualhome,goal_interpretation,27.2,34.4322,55.6213,42.5339,23.0769,26.1745,24.5283,12.4478,91.9753,21.9279,19.9423,52.005,28.8295,13.0,2121.6,36.17428251510342,49.27284215130387,23.867069486404834,16.666666666666664,9.112760416666667,38.34958628841608,79.77677008116243
google/gemma-1.1-2b-it,Gemma,virtualhome,goal_interpretation,2.5,9.6296,18.0556,12.5604,0.0,0.0,0.0,5.4795,32.0,9.3567,6.0694,13.6364,8.4,3.0,45.0,8.053373854341979,5.862826722774347,1.812688821752266,2.572706935123044,2.024479166666666,5.372709810874704,30.674831668860847
meta-llama/Llama-4-Scout-17B-16E-Instruct,Llama,virtualhome,goal_interpretation,108.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,26064.0,,,,,,,
deepseek-ai/DeepSeek-R1-Distill-Llama-70B,DeepSeek,virtualhome,goal_interpretation,70.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,6353.999999999999,27.809426360756188,35.81986234433108,30.74018126888218,2.0134228187919474,13.277343749999998,41.64635047281324,43.35939750971866
google/gemma-2b,Gemma,virtualhome,goal_interpretation,2.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,72.0,7.321959810488082,8.246263426638125,3.0211480362537766,0.6711409395973182,7.555989583333336,4.061391843971631,20.375825033134305
ibm-granite/granite-3.1-2b-base,Granite,virtualhome,goal_interpretation,2.5,17.6471,23.0769,20.0,2.0408,2.5974,2.2857,6.3745,44.4444,11.1498,8.3333,19.1176,11.6071,12.0,180.0,13.202826259598206,16.843689846888516,5.664652567975831,3.6912751677852316,3.9049479166666674,13.89627659574468,35.216115462528315
meta-llama/Llama-3.2-3B,Llama-3,virtualhome,goal_interpretation,3.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,172.8,8.697822716562822,14.232664884364107,1.8882175226586102,2.348993288590602,3.8148437499999996,16.528147163120565,13.374069690643047
deepseek-ai/DeepSeek-R1,DeepSeek,virtualhome,goal_interpretation,684.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.8,60783.600000000006,,,,,,,
google/gemma-2-2b-it,Gemma,virtualhome,goal_interpretation,2.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,31.200000000000003,17.046939294966545,17.980792881523424,0.0755287009063444,3.243847874720355,7.077343750000001,17.22074468085106,56.68337788179808
Qwen/Qwen3-0.6B,Qwen3,virtualhome,goal_interpretation,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0,172.8,,,,,,,
google/gemma-1.1-7b-it,Gemma,virtualhome,goal_interpretation,8.5,16.6667,50.0,25.0,0.0,0.0,0.0,28.5714,133.3333,47.0588,22.7273,83.3333,35.7143,6.0,306.0,17.693584228972615,15.93420938501317,4.909365558912387,5.8165548098433995,11.510937500000002,17.5993646572104,50.391073462856326
meta-llama/Llama-3.2-1B-Instruct,Llama-3,virtualhome,goal_interpretation,1.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,64.8,14.443126333711135,8.742521312303046,7.02416918429003,3.355704697986576,2.973437500000001,7.579787234042552,56.9831380736446
ibm-granite/granite-3.1-2b-instruct,Granite,virtualhome,goal_interpretation,2.5,14.5183,32.4242,20.0562,0.9756,2.7778,1.444,7.6509,91.6129,14.1223,7.53,33.2471,12.279,12.0,180.0,21.712212822028288,21.822956140794506,15.256797583081571,5.257270693512303,4.867708333333335,20.212765957446805,62.8557782240012
ibm-granite/granite-3.2-8b-instruct,Granite,virtualhome,goal_interpretation,8.2,23.5023,45.9459,31.0976,8.4428,15.411,10.9091,8.0024,84.9057,14.6262,11.5987,42.4745,18.2216,12.0,590.4,30.7704488980163,34.65536965519957,23.791540785498487,8.7248322147651,16.791406250000005,27.914450354609933,72.74509412802476
deepseek-ai/DeepSeek-V3,DeepSeek,virtualhome,goal_interpretation,684.5,22.9621,58.8235,33.0306,32.3843,61.0738,42.3256,13.04,100.6173,23.0878,20.3131,68.125,31.2949,14.8,60783.600000000006,,,,,,,
meta-llama/Meta-Llama-3-70B,Llama-3,virtualhome,goal_interpretation,70.6,30.7692,88.8889,45.7143,15.3846,50.0,23.5294,12.5,100.0,22.2222,20.6349,81.25,32.9114,15.0,6300.0,26.705350171613343,48.709812647505885,18.580060422960727,19.686800894854585,16.011197916666664,41.21232269503546,16.031906452656727
meta-llama/Llama-3.1-8B,Llama-3,virtualhome,goal_interpretation,8.0,17.9104,48.0,26.087,5.3571,15.0,7.8947,7.7922,50.0,13.4831,10.5,36.8421,16.3424,15.0,720.0,14.42086519266696,25.30447063475493,6.570996978851963,8.05369127516779,8.715104166666668,25.42109929078014,12.459828809780273
google/gemma-2-27b,Gemma,virtualhome,goal_interpretation,27.2,27.2727,50.0,35.2941,16.6667,40.0,23.5294,0.0,0.0,0.0,17.3913,44.4444,25.0,13.0,2121.6,23.926167340782822,37.390737454186464,16.61631419939577,13.422818791946312,13.921093749999997,37.4538268321513,24.75221301701707
ibm-granite/granite-3.1-8b-base,Granite,virtualhome,goal_interpretation,8.2,28.2759,41.8367,33.7449,3.0303,2.7027,2.8571,7.4169,82.8571,13.615,11.9601,34.7826,17.7998,12.0,590.4,20.05719991900457,26.01958867101177,9.441087613293051,9.507829977628639,8.36197916666667,24.802378841607563,42.21033524381973
01-ai/Yi-1.5-34B-Chat,Yi,virtualhome,goal_interpretation,34.4,22.0238,55.8491,31.5902,16.4811,37.0,22.8043,9.6267,93.3333,17.4533,14.9603,56.1404,23.625,3.6,743.04,33.35799367075618,44.262825981005655,27.719033232628398,15.324384787472036,13.058072916666665,39.11606087470449,60.66758423205982
01-ai/Yi-1.5-34B,Yi,virtualhome,goal_interpretation,34.4,27.5109,51.6393,35.8974,9.2105,11.0526,10.0478,12.012,86.0215,21.0804,16.7899,43.074,24.1618,3.6,743.04,25.64649419429311,42.74936268839652,15.332326283987916,15.436241610738257,11.217187500000003,40.732121749408975,28.411725333226947
LGAI-EXAONE/EXAONE-3.5-32B-Instruct,Exaone,virtualhome,goal_interpretation,32.0,24.3553,61.1511,34.8361,16.2562,36.2637,22.449,10.0656,97.8723,18.254,16.2537,59.2058,25.5054,6.5,1248.0,37.603165755662836,39.82420331711213,51.283987915407856,5.033557046979867,5.150000000000001,40.40890957446809,83.91833668000905
deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B,DeepSeek,virtualhome,goal_interpretation,1.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,194.4,10.351036796154286,4.729119207646243,16.91842900302115,0.7829977628635317,2.9656249999999993,2.0759456264775418,34.63410417691725
openai/gpt-oss-20b,GPT-OSS,virtualhome,goal_interpretation,21.5,35.4756,58.4746,44.16,51.0101,46.7593,48.7923,14.24,80.1802,24.1848,27.0627,58.2593,36.9577,,,,,,,,,
meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8,Llama,virtualhome,goal_interpretation,401.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.0,53011.2,,,,,,,
meta-llama/Meta-Llama-3-8B-Instruct,Llama-3,virtualhome,goal_interpretation,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,720.0,23.908735693936837,28.244949576343615,8.685800604229607,1.230425055928408,1.602864583333335,29.604388297872337,74.08398604591373
Qwen/Qwen3-1.7B,Qwen3,virtualhome,goal_interpretation,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0,432.0,,,,,,,
meta-llama/Llama-2-13b-hf,Llama-2,virtualhome,goal_interpretation,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,156.0,11.065185981273997,17.222559825058127,1.5105740181268883,4.138702460850116,3.385416666666666,15.309175531914892,24.824687385027282
ibm-granite/granite-3.3-2b-instruct,Granite,virtualhome,goal_interpretation,2.5,16.8142,28.7009,21.2054,1.7937,1.3652,1.5504,7.3139,72.9032,13.2941,9.087,27.2144,13.6247,12.0,180.0,,,,,,,
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B,DeepSeek,virtualhome,goal_interpretation,14.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,1598.4,38.22146462032291,40.69076685552542,57.02416918429003,18.34451901565996,28.711458333333326,40.74135638297872,43.81651795015004
mistralai/Mistral-7B-Instruct-v0.2,Mistral,virtualhome,goal_interpretation,7.2,13.5309,48.6111,21.1694,9.8361,26.2136,14.3046,9.0435,78.7879,16.2246,10.6263,47.4729,17.3655,,,18.50789159273764,22.910601936713604,3.0211480362537766,3.467561521252797,7.608854166666667,19.076906028368796,54.96227786717022
deepseek-ai/DeepSeek-R1-Distill-Qwen-32B,DeepSeek,virtualhome,goal_interpretation,32.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,3542.3999999999996,22.96226839270608,17.149673765590364,17.069486404833835,4.5861297539149914,16.1421875,40.962987588652474,41.86314534324481
moonshotai/Kimi-K2-Instruct,Kimi,virtualhome,goal_interpretation,1000.0,34.8624,56.7164,43.1818,26.8797,48.3108,34.5411,13.9489,87.6543,24.0678,22.673,59.8991,32.8947,15.5,93000.0,,,,,,,
01-ai/Yi-Coder-1.5B,Yi,virtualhome,goal_interpretation,1.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.4,21.6,,,,,,,
google/gemma-2-9b-it,Gemma,virtualhome,goal_interpretation,9.2,47.619,52.6316,50.0,0.0,0.0,0.0,3.0303,33.3333,5.5556,20.3704,40.7407,27.1605,8.0,441.6,32.07276025267082,42.136619683664655,19.486404833836858,14.76510067114094,9.742187500000002,31.949985224586293,74.35626360279613
deepseek-ai/DeepSeek-R1-Distill-Qwen-7B,DeepSeek,virtualhome,goal_interpretation,7.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,820.8,14.99492256865316,7.882702983365756,19.561933534743204,3.9149888143176734,3.5518229166666675,14.681220449172578,40.3768667136531
LGAI-EXAONE/EXAONE-Deep-32B,Exaone,virtualhome,goal_interpretation,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.5,1248.0,,,,,,,
01-ai/Yi-1.5-6B,Yi,virtualhome,goal_interpretation,6.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.6,131.76,16.745698054972127,22.027904536694773,6.646525679758309,8.501118568232664,13.309114583333335,23.823507683215126,26.166017278598567
google/gemma-3-12b-pt,Gemma,virtualhome,goal_interpretation,12.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,878.4,,,,,,,
meta-llama/Meta-Llama-3-8B,Llama-3,virtualhome,goal_interpretation,8.0,16.6667,66.6667,26.6667,16.6667,9.0909,11.7647,0.0,0.0,0.0,10.8696,25.0,15.1515,15.0,720.0,13.626857071686075,24.50076379676797,4.531722054380665,7.38255033557047,6.242447916666666,24.553043735224584,14.550614591506093
google/gemma-3-27b-it,Gemma,virtualhome,goal_interpretation,27.4,33.3333,59.1176,42.6299,29.0,19.4631,23.2932,16.3,100.6173,28.0551,23.4054,52.75,32.4241,14.0,2301.6,,,,,,,
Qwen/Qwen3-4B,Qwen3,virtualhome,goal_interpretation,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0,864.0,,,,,,,
Qwen/Qwen3-235B-A22B-Thinking-2507,Qwen3,virtualhome,goal_interpretation,235.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0,50781.6,,,,,,,
ibm-granite/granite-3.3-8b-instruct,Granite,virtualhome,goal_interpretation,8.2,26.4,40.2439,31.8841,22.0339,22.807,22.4138,5.4237,51.6129,9.816,12.9436,36.4706,19.1063,12.0,590.4,,,,,,,
google/gemma-3-4b-pt,Gemma,virtualhome,goal_interpretation,4.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,103.2,,,,,,,
01-ai/Yi-Coder-9B-Chat,Yi,virtualhome,goal_interpretation,8.8,19.833,52.7778,28.8316,15.1007,29.8013,20.0445,11.463,83.5165,20.1592,15.0,51.1848,23.2009,2.4,126.72,16.985989314863886,25.94315294491389,4.003021148036254,0.0,7.963802083333333,15.83554964539007,48.17041006750976
meta-llama/Llama-3.1-70B,Llama-3,virtualhome,goal_interpretation,70.6,23.1638,56.1644,32.8,29.9065,50.0,37.4269,13.3333,66.6667,22.2222,20.2358,56.5934,29.8119,15.0,6353.999999999999,26.200215843375947,46.39941295581887,18.429003021148038,18.34451901565996,16.581770833333337,40.602836879432616,16.843752354862875
google/gemma-2-2b,Gemma,virtualhome,goal_interpretation,2.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,31.200000000000003,10.129463155055184,11.755807532236112,2.8700906344410875,1.6778523489932917,11.430468750000001,13.111332742316787,19.931226922343825
google/gemma-7b,Gemma,virtualhome,goal_interpretation,8.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,252.0,15.442818570272307,21.11609932329174,7.401812688821751,4.921700223713646,10.979947916666669,21.644134160756497,26.593217108383534
01-ai/Yi-Coder-9B,Yi,virtualhome,goal_interpretation,8.8,26.3889,51.3514,34.8624,0.0,0.0,0.0,13.9535,70.5882,23.301,17.5141,34.8315,23.3083,2.4,126.72,,,,,,,
ibm-granite/granite-3.3-8b-base,Granite,virtualhome,goal_interpretation,8.2,22.2989,45.3271,29.8921,4.386,5.0761,4.7059,6.3749,76.1364,11.7647,10.1517,34.8697,15.7253,12.0,590.4,,,,,,,
meta-llama/Llama-2-70b-hf,Llama-2,virtualhome,goal_interpretation,69.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,840.0,18.372598605703004,35.900061863721675,3.2477341389728096,7.046979865771815,9.777604166666668,30.1954048463357,24.06780675274937
meta-llama/Llama-3.2-3B-Instruct,Llama-3,virtualhome,goal_interpretation,3.2,17.9601,58.2734,27.4576,3.1674,11.9658,5.0089,9.7561,72.1311,17.1875,10.3423,43.8486,16.7369,9.0,172.8,24.204650807793456,24.059186446885473,17.673716012084594,3.8031319910514525,1.3734374999999996,24.386820330969268,73.93161256576994
