[
  {
    "Model":"meta-llama\/Llama-3.3-70B-Instruct",
    "Model Family":"Llama-3",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":70.6,
    "node_precision":28.763,
    "node_recall":57.27,
    "node_f1":38.2937,
    "edge_precision":24.0084,
    "edge_recall":38.9831,
    "edge_f1":29.7158,
    "action_precision":11.9556,
    "action_recall":86.4198,
    "action_f1":21.0053,
    "all_precision":19.302,
    "all_recall":56.4232,
    "all_f1":28.764
  },
  {
    "Model":"01-ai\/Yi-Coder-1.5B-Chat",
    "Model Family":"Yi",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":1.5,
    "node_precision":26.7241,
    "node_recall":10.8014,
    "node_f1":15.3846,
    "edge_precision":3.4483,
    "edge_recall":0.4132,
    "edge_f1":0.738,
    "action_precision":6.7729,
    "action_recall":11.7241,
    "action_f1":8.5859,
    "all_precision":12.3737,
    "all_recall":7.27,
    "all_f1":9.1589
  },
  {
    "Model":"Qwen\/Qwen1.5-1.8B",
    "Model Family":"Qwen1.5",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":1.8,
    "node_precision":13.0435,
    "node_recall":4.7809,
    "node_f1":6.9971,
    "edge_precision":0.0,
    "edge_recall":0.0,
    "edge_f1":0.0,
    "action_precision":1.5198,
    "action_recall":5.4945,
    "action_f1":2.381,
    "all_precision":3.6638,
    "all_recall":3.1136,
    "all_f1":3.3663
  },
  {
    "Model":"mistralai\/Mixtral-8x7B-Instruct-v0.1",
    "Model Family":"Mistral",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":46.7,
    "node_precision":19.7674,
    "node_recall":50.4451,
    "node_f1":28.4043,
    "edge_precision":15.8363,
    "edge_recall":30.1695,
    "edge_f1":20.7701,
    "action_precision":8.8417,
    "action_recall":81.1321,
    "action_f1":15.9456,
    "all_precision":13.4675,
    "all_recall":49.0518,
    "all_f1":21.1329
  },
  {
    "Model":"google\/gemma-3-12b-it",
    "Model Family":"Gemma",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":12.2,
    "node_precision":24.7326,
    "node_recall":54.4118,
    "node_f1":34.0074,
    "edge_precision":57.1429,
    "edge_recall":1.3423,
    "edge_f1":2.623,
    "action_precision":14.3139,
    "action_recall":89.5062,
    "action_f1":24.6809,
    "all_precision":18.8914,
    "all_recall":41.75,
    "all_f1":26.0125
  },
  {
    "Model":"google\/gemma-3-4b-it",
    "Model Family":"Gemma",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":4.3,
    "node_precision":22.0896,
    "node_recall":43.6578,
    "node_f1":29.336,
    "edge_precision":9.3168,
    "edge_recall":10.0671,
    "edge_f1":9.6774,
    "action_precision":10.5628,
    "action_recall":84.5679,
    "action_f1":18.78,
    "all_precision":13.7615,
    "all_recall":39.4243,
    "all_f1":20.4016
  },
  {
    "Model":"google\/gemma-7b-it",
    "Model Family":"Gemma",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":8.5,
    "node_precision":21.2581,
    "node_recall":28.8235,
    "node_f1":24.4694,
    "edge_precision":13.8462,
    "edge_recall":15.1007,
    "edge_f1":14.4462,
    "action_precision":9.5335,
    "action_recall":58.0247,
    "action_f1":16.3763,
    "all_precision":13.3747,
    "all_recall":29.625,
    "all_f1":18.4292
  },
  {
    "Model":"01-ai\/Yi-1.5-6B-Chat",
    "Model Family":"Yi",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":6.1,
    "node_precision":16.1212,
    "node_recall":39.5833,
    "node_f1":22.9113,
    "edge_precision":2.9268,
    "edge_recall":8.4211,
    "edge_f1":4.3439,
    "action_precision":11.1018,
    "action_recall":84.0764,
    "action_f1":19.6137,
    "all_precision":10.1976,
    "all_recall":37.1465,
    "all_f1":16.0022
  },
  {
    "Model":"Qwen\/Qwen1.5-7B",
    "Model Family":"Qwen1.5",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":7.7,
    "node_precision":14.986,
    "node_recall":32.8221,
    "node_f1":20.5769,
    "edge_precision":3.0612,
    "edge_recall":7.2917,
    "edge_f1":4.3121,
    "action_precision":3.5211,
    "action_recall":9.4937,
    "action_f1":5.137,
    "all_precision":7.8313,
    "all_recall":18.5233,
    "all_f1":11.0085
  },
  {
    "Model":"Qwen\/Qwen1.5-14B",
    "Model Family":"Qwen1.5",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":14.2,
    "node_precision":15.7395,
    "node_recall":35.5828,
    "node_f1":21.825,
    "edge_precision":5.1672,
    "edge_recall":5.9233,
    "edge_f1":5.5195,
    "action_precision":5.1546,
    "action_recall":30.4054,
    "action_f1":8.8149,
    "all_precision":9.18,
    "all_recall":23.3903,
    "all_f1":13.1852
  },
  {
    "Model":"google\/gemma-2b-it",
    "Model Family":"Gemma",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":2.5,
    "node_precision":4.6429,
    "node_recall":11.8541,
    "node_f1":6.6724,
    "edge_precision":5.2632,
    "edge_recall":4.5296,
    "edge_f1":4.8689,
    "action_precision":4.7041,
    "action_recall":20.8054,
    "action_f1":7.6733,
    "all_precision":4.7537,
    "all_recall":10.8497,
    "all_f1":6.6109
  },
  {
    "Model":"Qwen\/Qwen3-14B",
    "Model Family":"Qwen3",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":14.8,
    "node_precision":22.9167,
    "node_recall":19.4118,
    "node_f1":21.0191,
    "edge_precision":24.7368,
    "edge_recall":15.7718,
    "edge_f1":19.2623,
    "action_precision":11.7962,
    "action_recall":27.1605,
    "action_f1":16.4486,
    "all_precision":18.4489,
    "all_recall":19.625,
    "all_f1":19.0188
  },
  {
    "Model":"Qwen\/Qwen1.5-72B",
    "Model Family":"Qwen1.5",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":72.3,
    "node_precision":17.8723,
    "node_recall":40.0,
    "node_f1":24.7059,
    "edge_precision":5.5556,
    "edge_recall":5.6391,
    "edge_f1":5.597,
    "action_precision":6.8256,
    "action_recall":44.3662,
    "action_f1":11.831,
    "all_precision":10.7482,
    "all_recall":28.2158,
    "all_f1":15.5666
  },
  {
    "Model":"openai\/gpt-oss-120b",
    "Model Family":"GPT-OSS",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":120.4,
    "node_precision":33.7815,
    "node_recall":59.6439,
    "node_f1":43.133,
    "edge_precision":49.8728,
    "edge_recall":66.2162,
    "edge_f1":56.894,
    "action_precision":15.5844,
    "action_recall":75.9494,
    "action_f1":25.8621,
    "all_precision":29.4084,
    "all_recall":65.3603,
    "all_f1":40.5649
  },
  {
    "Model":"Qwen\/Qwen1.5-4B",
    "Model Family":"Qwen1.5",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":4.0,
    "node_precision":16.3895,
    "node_recall":21.6301,
    "node_f1":18.6486,
    "edge_precision":1.3201,
    "edge_recall":1.444,
    "edge_f1":1.3793,
    "action_precision":3.8647,
    "action_recall":10.5263,
    "action_f1":5.6537,
    "all_precision":7.8207,
    "all_recall":11.8984,
    "all_f1":9.438
  },
  {
    "Model":"meta-llama\/Llama-3.1-70B-Instruct",
    "Model Family":"Llama-3",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":70.6,
    "node_precision":24.3176,
    "node_recall":57.6471,
    "node_f1":34.2059,
    "edge_precision":23.2394,
    "edge_recall":44.2953,
    "edge_f1":30.485,
    "action_precision":11.874,
    "action_recall":90.7407,
    "action_f1":21.0,
    "all_precision":18.1853,
    "all_recall":59.375,
    "all_f1":27.8429
  },
  {
    "Model":"Qwen\/Qwen-72B",
    "Model Family":"Qwen",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":72.3,
    "node_precision":0.0,
    "node_recall":0.0,
    "node_f1":0.0,
    "edge_precision":0.0,
    "edge_recall":0.0,
    "edge_f1":0.0,
    "action_precision":0.0,
    "action_recall":0.0,
    "action_f1":0.0,
    "all_precision":0.0,
    "all_recall":0.0,
    "all_f1":0.0
  },
  {
    "Model":"Qwen\/Qwen3-32B",
    "Model Family":"Qwen3",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":32.8,
    "node_precision":26.3328,
    "node_recall":48.368,
    "node_f1":34.1004,
    "edge_precision":26.1954,
    "edge_recall":42.4242,
    "edge_f1":32.3907,
    "action_precision":12.6173,
    "action_recall":75.625,
    "action_f1":21.6265,
    "all_precision":19.9126,
    "all_recall":51.6373,
    "all_f1":28.7417
  },
  {
    "Model":"Qwen\/Qwen3-8B",
    "Model Family":"Qwen3",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":8.2,
    "node_precision":22.2607,
    "node_recall":56.7647,
    "node_f1":31.9801,
    "edge_precision":22.5632,
    "edge_recall":41.9463,
    "edge_f1":29.3427,
    "action_precision":8.1855,
    "action_recall":74.0741,
    "action_f1":14.742,
    "all_precision":15.1715,
    "all_recall":54.75,
    "all_f1":23.7592
  },
  {
    "Model":"google\/gemma-2-27b-it",
    "Model Family":"Gemma",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":27.2,
    "node_precision":33.0784,
    "node_recall":51.6418,
    "node_f1":40.3263,
    "edge_precision":20.922,
    "edge_recall":20.1365,
    "edge_f1":20.5217,
    "action_precision":12.1212,
    "action_recall":90.566,
    "action_f1":21.3808,
    "all_precision":18.866,
    "all_recall":47.7764,
    "all_f1":27.0504
  },
  {
    "Model":"google\/gemma-1.1-2b-it",
    "Model Family":"Gemma",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":2.5,
    "node_precision":12.3552,
    "node_recall":20.3822,
    "node_f1":15.3846,
    "edge_precision":1.3575,
    "edge_recall":1.1765,
    "edge_f1":1.2605,
    "action_precision":5.2567,
    "action_recall":28.1046,
    "action_f1":8.8568,
    "all_precision":7.0649,
    "all_recall":15.2355,
    "all_f1":9.6534
  },
  {
    "Model":"meta-llama\/Llama-4-Scout-17B-16E-Instruct",
    "Model Family":"Llama",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":108.6,
    "node_precision":22.9581,
    "node_recall":61.1765,
    "node_f1":33.3868,
    "edge_precision":21.0526,
    "edge_recall":49.6644,
    "edge_f1":29.5704,
    "action_precision":10.3163,
    "action_recall":84.5679,
    "action_f1":18.3893,
    "all_precision":16.7858,
    "all_recall":61.625,
    "all_f1":26.3848
  },
  {
    "Model":"deepseek-ai\/DeepSeek-R1-Distill-Llama-70B",
    "Model Family":"DeepSeek",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":70.6,
    "node_precision":30.6569,
    "node_recall":15.6134,
    "node_f1":20.6897,
    "edge_precision":15.0,
    "edge_recall":3.7344,
    "edge_f1":5.9801,
    "action_precision":10.1852,
    "action_recall":16.6667,
    "action_f1":12.6437,
    "all_precision":17.6755,
    "all_recall":11.3707,
    "all_f1":13.8389
  },
  {
    "Model":"deepseek-ai\/DeepSeek-R1",
    "Model Family":"DeepSeek",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":684.5,
    "node_precision":25.8209,
    "node_recall":51.952,
    "node_f1":34.4965,
    "edge_precision":28.6334,
    "edge_recall":45.0512,
    "edge_f1":35.0133,
    "action_precision":16.0839,
    "action_recall":86.25,
    "action_f1":27.112,
    "all_precision":22.2725,
    "all_recall":56.3613,
    "all_f1":31.9279
  },
  {
    "Model":"google\/gemma-2-2b-it",
    "Model Family":"Gemma",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":2.6,
    "node_precision":18.541,
    "node_recall":39.1026,
    "node_f1":25.1546,
    "edge_precision":3.5714,
    "edge_recall":5.7143,
    "edge_f1":4.3956,
    "action_precision":7.0284,
    "action_recall":36.5385,
    "action_f1":11.789,
    "all_precision":10.1721,
    "all_recall":26.0695,
    "all_f1":14.6341
  },
  {
    "Model":"Qwen\/Qwen3-0.6B",
    "Model Family":"Qwen3",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":0.8,
    "node_precision":18.4516,
    "node_recall":42.4332,
    "node_f1":25.7194,
    "edge_precision":2.1841,
    "edge_recall":4.7458,
    "edge_f1":2.9915,
    "action_precision":8.1967,
    "action_recall":44.0252,
    "action_f1":13.8203,
    "all_precision":10.0,
    "all_recall":28.6979,
    "all_f1":14.8318
  },
  {
    "Model":"google\/gemma-1.1-7b-it",
    "Model Family":"Gemma",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":8.5,
    "node_precision":13.2743,
    "node_recall":4.4379,
    "node_f1":6.6519,
    "edge_precision":6.25,
    "edge_recall":0.6734,
    "edge_f1":1.2158,
    "action_precision":11.5385,
    "action_recall":14.9068,
    "action_f1":13.0081,
    "all_precision":11.6147,
    "all_recall":5.1508,
    "all_f1":7.1366
  },
  {
    "Model":"meta-llama\/Llama-3.2-1B-Instruct",
    "Model Family":"Llama-3",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":1.2,
    "node_precision":17.2043,
    "node_recall":6.1776,
    "node_f1":9.0909,
    "edge_precision":0.8734,
    "edge_recall":1.0,
    "edge_f1":0.9324,
    "action_precision":5.4922,
    "action_recall":42.7419,
    "action_f1":9.7337,
    "all_precision":5.5167,
    "all_recall":12.1784,
    "all_f1":7.5936
  },
  {
    "Model":"deepseek-ai\/DeepSeek-V3",
    "Model Family":"DeepSeek",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":684.5,
    "node_precision":21.966,
    "node_recall":59.5395,
    "node_f1":32.0922,
    "edge_precision":32.13,
    "edge_recall":62.6761,
    "edge_f1":42.4821,
    "action_precision":13.7021,
    "action_recall":99.3827,
    "action_f1":24.0838,
    "all_precision":20.3682,
    "all_recall":69.3333,
    "all_f1":31.4865
  },
  {
    "Model":"01-ai\/Yi-1.5-34B-Chat",
    "Model Family":"Yi",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":34.4,
    "node_precision":24.3354,
    "node_recall":35.3116,
    "node_f1":28.8136,
    "edge_precision":18.3607,
    "edge_recall":18.9189,
    "edge_f1":18.6356,
    "action_precision":9.6277,
    "action_recall":47.4684,
    "action_f1":16.0085,
    "all_precision":15.8932,
    "all_recall":31.6056,
    "all_f1":21.1506
  },
  {
    "Model":"meta-llama\/Meta-Llama-3-70B-Instruct",
    "Model Family":"Llama-3",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":70.6,
    "node_precision":20.7831,
    "node_recall":60.8824,
    "node_f1":30.988,
    "edge_precision":20.2194,
    "edge_recall":43.2886,
    "edge_f1":27.5641,
    "action_precision":10.429,
    "action_recall":87.037,
    "action_f1":18.6262,
    "all_precision":15.9745,
    "all_recall":59.625,
    "all_f1":25.1981
  },
  {
    "Model":"deepseek-ai\/DeepSeek-R1-Distill-Llama-8B",
    "Model Family":"DeepSeek",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":8.0,
    "node_precision":22.6131,
    "node_recall":39.9408,
    "node_f1":28.877,
    "edge_precision":6.4748,
    "edge_recall":9.1837,
    "edge_f1":7.5949,
    "action_precision":14.3293,
    "action_recall":58.3851,
    "action_f1":23.011,
    "all_precision":15.3293,
    "all_recall":32.2825,
    "all_f1":20.7877
  },
  {
    "Model":"LGAI-EXAONE\/EXAONE-3.5-32B-Instruct",
    "Model Family":"Exaone",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":32.0,
    "node_precision":18.0952,
    "node_recall":59.19,
    "node_f1":27.717,
    "edge_precision":12.6829,
    "edge_recall":28.0576,
    "edge_f1":17.4692,
    "action_precision":10.1879,
    "action_recall":92.1569,
    "action_f1":18.3474,
    "all_precision":13.4142,
    "all_recall":54.3883,
    "all_f1":21.5207
  },
  {
    "Model":"deepseek-ai\/DeepSeek-R1-Distill-Qwen-1.5B",
    "Model Family":"DeepSeek",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":1.8,
    "node_precision":7.3969,
    "node_recall":19.403,
    "node_f1":10.7106,
    "edge_precision":1.0225,
    "edge_recall":2.2321,
    "edge_f1":1.4025,
    "action_precision":6.5141,
    "action_recall":30.0813,
    "action_f1":10.7091,
    "all_precision":5.3409,
    "all_recall":15.2846,
    "all_f1":7.9158
  },
  {
    "Model":"openai\/gpt-oss-20b",
    "Model Family":"GPT-OSS",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":21.5,
    "node_precision":33.1976,
    "node_recall":56.0137,
    "node_f1":41.688,
    "edge_precision":54.9763,
    "edge_recall":46.5863,
    "edge_f1":50.4348,
    "action_precision":13.5431,
    "action_recall":71.7391,
    "action_f1":22.7848,
    "all_precision":26.3782,
    "all_recall":55.7522,
    "all_f1":35.8124
  },
  {
    "Model":"meta-llama\/Llama-4-Maverick-17B-128E-Instruct-FP8",
    "Model Family":"Llama",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":401.6,
    "node_precision":35.1301,
    "node_recall":56.9277,
    "node_f1":43.4483,
    "edge_precision":27.1144,
    "edge_recall":37.0748,
    "edge_f1":31.3218,
    "action_precision":24.0876,
    "action_recall":61.4907,
    "action_f1":34.6154,
    "all_precision":29.3856,
    "all_recall":50.4447,
    "all_f1":37.1375
  },
  {
    "Model":"meta-llama\/Meta-Llama-3-8B-Instruct",
    "Model Family":"Llama-3",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":8.0,
    "node_precision":19.9408,
    "node_recall":59.4118,
    "node_f1":29.8596,
    "edge_precision":13.7214,
    "edge_recall":22.1477,
    "edge_f1":16.9448,
    "action_precision":10.6227,
    "action_recall":71.6049,
    "action_f1":18.5008,
    "all_precision":14.8492,
    "all_recall":48.0,
    "all_f1":22.6816
  },
  {
    "Model":"Qwen\/Qwen3-1.7B",
    "Model Family":"Qwen3",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":2.0,
    "node_precision":23.7082,
    "node_recall":46.4286,
    "node_f1":31.3883,
    "edge_precision":7.6487,
    "edge_recall":9.1525,
    "edge_f1":8.3333,
    "action_precision":8.1818,
    "action_recall":45.0,
    "action_f1":13.8462,
    "all_precision":13.4849,
    "all_recall":32.2377,
    "all_f1":19.0157
  },
  {
    "Model":"meta-llama\/Llama-3.1-8B-Instruct",
    "Model Family":"Llama-3",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":8.0,
    "node_precision":18.1646,
    "node_recall":56.4706,
    "node_f1":27.4875,
    "edge_precision":12.9293,
    "edge_recall":21.5488,
    "edge_f1":16.1616,
    "action_precision":11.4099,
    "action_recall":86.9565,
    "action_f1":20.1729,
    "all_precision":14.2497,
    "all_recall":49.6241,
    "all_f1":22.1415
  },
  {
    "Model":"deepseek-ai\/DeepSeek-R1-Distill-Qwen-14B",
    "Model Family":"DeepSeek",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":14.8,
    "node_precision":18.1382,
    "node_recall":55.5882,
    "node_f1":27.3517,
    "edge_precision":7.8704,
    "edge_recall":17.2881,
    "edge_f1":10.8165,
    "action_precision":9.1111,
    "action_recall":76.875,
    "action_f1":16.2914,
    "all_precision":11.9408,
    "all_recall":45.6604,
    "all_f1":18.9309
  },
  {
    "Model":"mistralai\/Mistral-7B-Instruct-v0.2",
    "Model Family":"Mistral",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":7.2,
    "node_precision":12.806,
    "node_recall":46.4164,
    "node_f1":20.0738,
    "edge_precision":7.6164,
    "edge_recall":18.4932,
    "edge_f1":10.7892,
    "action_precision":7.377,
    "action_recall":74.0506,
    "action_f1":13.4174,
    "all_precision":9.1451,
    "all_recall":41.319,
    "all_f1":14.9756
  },
  {
    "Model":"deepseek-ai\/DeepSeek-R1-Distill-Qwen-32B",
    "Model Family":"DeepSeek",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":32.8,
    "node_precision":25.2778,
    "node_recall":54.1667,
    "node_f1":34.4697,
    "edge_precision":33.7621,
    "edge_recall":35.9589,
    "edge_f1":34.8259,
    "action_precision":12.8599,
    "action_recall":83.75,
    "action_f1":22.2962,
    "all_precision":20.3087,
    "all_recall":53.4264,
    "all_f1":29.4303
  },
  {
    "Model":"moonshotai\/Kimi-K2-Instruct",
    "Model Family":"Kimi",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":1000.0,
    "node_precision":40.3226,
    "node_recall":15.015,
    "node_f1":21.8818,
    "edge_precision":20.7358,
    "edge_recall":21.2329,
    "edge_f1":20.9814,
    "action_precision":12.9477,
    "action_recall":58.75,
    "action_f1":21.219,
    "all_precision":17.9286,
    "all_recall":26.242,
    "all_f1":21.303
  },
  {
    "Model":"google\/gemma-2-9b-it",
    "Model Family":"Gemma",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":9.2,
    "node_precision":19.5329,
    "node_recall":54.5994,
    "node_f1":28.7725,
    "edge_precision":17.8571,
    "edge_recall":1.7007,
    "edge_f1":3.1056,
    "action_precision":8.8907,
    "action_recall":67.7019,
    "action_f1":15.7174,
    "all_precision":13.5701,
    "all_recall":37.6263,
    "all_f1":19.9465
  },
  {
    "Model":"deepseek-ai\/DeepSeek-R1-Distill-Qwen-7B",
    "Model Family":"DeepSeek",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":7.6,
    "node_precision":18.2137,
    "node_recall":31.8043,
    "node_f1":23.1626,
    "edge_precision":8.3333,
    "edge_recall":4.947,
    "edge_f1":6.2084,
    "action_precision":9.067,
    "action_recall":45.098,
    "action_f1":15.0985,
    "all_precision":12.4667,
    "all_recall":24.5085,
    "all_f1":16.5267
  },
  {
    "Model":"LGAI-EXAONE\/EXAONE-Deep-32B",
    "Model Family":"Exaone",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":32.0,
    "node_precision":0.0,
    "node_recall":0.0,
    "node_f1":0.0,
    "edge_precision":0.0,
    "edge_recall":0.0,
    "edge_f1":0.0,
    "action_precision":0.0,
    "action_recall":0.0,
    "action_f1":0.0,
    "all_precision":0.0,
    "all_recall":0.0,
    "all_f1":0.0
  },
  {
    "Model":"Qwen\/Qwen-7B",
    "Model Family":"Qwen",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":7.7,
    "node_precision":0.0,
    "node_recall":0.0,
    "node_f1":0.0,
    "edge_precision":0.0,
    "edge_recall":0.0,
    "edge_f1":0.0,
    "action_precision":0.0,
    "action_recall":0.0,
    "action_f1":0.0,
    "all_precision":0.0,
    "all_recall":0.0,
    "all_f1":0.0
  },
  {
    "Model":"google\/gemma-3-27b-it",
    "Model Family":"Gemma",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":27.4,
    "node_precision":38.0952,
    "node_recall":54.1176,
    "node_f1":44.7145,
    "edge_precision":29.0598,
    "edge_recall":11.4094,
    "edge_f1":16.3855,
    "action_precision":16.3392,
    "action_recall":97.5309,
    "action_f1":27.9894,
    "all_precision":23.9949,
    "all_recall":47.0,
    "all_f1":31.7702
  },
  {
    "Model":"Qwen\/Qwen3-4B",
    "Model Family":"Qwen3",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":4.0,
    "node_precision":24.3466,
    "node_recall":52.2124,
    "node_f1":33.2083,
    "edge_precision":12.4098,
    "edge_recall":28.8591,
    "edge_f1":17.3562,
    "action_precision":8.1377,
    "action_recall":64.5963,
    "action_f1":14.4545,
    "all_precision":13.6027,
    "all_recall":45.99,
    "all_f1":20.9954
  },
  {
    "Model":"01-ai\/Yi-Coder-9B-Chat",
    "Model Family":"Yi",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":8.8,
    "node_precision":19.2628,
    "node_recall":49.0909,
    "node_f1":27.6687,
    "edge_precision":15.7989,
    "edge_recall":30.8772,
    "edge_f1":20.9026,
    "action_precision":10.7081,
    "action_recall":77.5,
    "action_f1":18.8164,
    "all_precision":14.6322,
    "all_recall":48.2581,
    "all_f1":22.4557
  },
  {
    "Model":"meta-llama\/Llama-3.2-3B-Instruct",
    "Model Family":"Llama-3",
    "dataset":"virtualhome",
    "eval_type":"goal_interpretation_v4",
    "Model Size (B)":3.2,
    "node_precision":17.0268,
    "node_recall":58.806,
    "node_f1":26.4075,
    "edge_precision":3.3606,
    "edge_recall":12.585,
    "edge_f1":5.3047,
    "action_precision":9.7623,
    "action_recall":73.7179,
    "action_f1":17.2414,
    "all_precision":10.1572,
    "all_recall":44.4586,
    "all_f1":16.5364
  }
]