[
  {
    "Model":"meta-llama\/Llama-3.3-70B-Instruct",
    "Model Family":"Llama-3",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":70.6,
    "task_success_rate":63.9344,
    "state_goal":54.3165,
    "relation_goal":76.1111,
    "action_goal":61.4865,
    "total_goal":62.5413,
    "execution_success_rate":68.9,
    "parsing_error":0.3279,
    "hallucination_error":16.0656,
    "predicate_argument_number_error":0.0,
    "wrong_order_error":0.0,
    "missing_step_error":14.7541,
    "affordance_error":0.0,
    "additional_step_error":1.9672
  },
  {
    "Model":"01-ai\/Yi-Coder-1.5B-Chat",
    "Model Family":"Yi",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":1.5,
    "task_success_rate":0.6557,
    "state_goal":8.2734,
    "relation_goal":0.5556,
    "action_goal":0.0,
    "total_goal":3.9604,
    "execution_success_rate":2.6,
    "parsing_error":10.4918,
    "hallucination_error":44.5902,
    "predicate_argument_number_error":14.4262,
    "wrong_order_error":0.0,
    "missing_step_error":22.9508,
    "affordance_error":6.5574,
    "additional_step_error":0.6557
  },
  {
    "Model":"Qwen\/Qwen1.5-1.8B",
    "Model Family":"Qwen1.5",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":1.8,
    "task_success_rate":0.0,
    "state_goal":0.0,
    "relation_goal":0.0,
    "action_goal":0.0,
    "total_goal":0.0,
    "execution_success_rate":0.0,
    "parsing_error":122.2951,
    "hallucination_error":18.3607,
    "predicate_argument_number_error":0.0,
    "wrong_order_error":0.0,
    "missing_step_error":0.0,
    "affordance_error":0.0,
    "additional_step_error":0.0
  },
  {
    "Model":"mistralai\/Mixtral-8x7B-Instruct-v0.1",
    "Model Family":"Mistral",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":46.7,
    "task_success_rate":29.1803,
    "state_goal":36.6906,
    "relation_goal":21.1111,
    "action_goal":16.8919,
    "total_goal":27.2277,
    "execution_success_rate":26.2,
    "parsing_error":2.623,
    "hallucination_error":22.9508,
    "predicate_argument_number_error":2.9508,
    "wrong_order_error":0.9836,
    "missing_step_error":42.623,
    "affordance_error":1.9672,
    "additional_step_error":1.3115
  },
  {
    "Model":"01-ai\/Yi-1.5-6B-Chat",
    "Model Family":"Yi",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":6.1,
    "task_success_rate":13.9456,
    "state_goal":22.3827,
    "relation_goal":14.7059,
    "action_goal":5.4054,
    "total_goal":15.9664,
    "execution_success_rate":16.0,
    "parsing_error":10.2041,
    "hallucination_error":31.6327,
    "predicate_argument_number_error":4.0816,
    "wrong_order_error":0.0,
    "missing_step_error":36.0544,
    "affordance_error":4.7619,
    "additional_step_error":1.7007
  },
  {
    "Model":"Qwen\/Qwen1.5-7B",
    "Model Family":"Qwen1.5",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":7.7,
    "task_success_rate":0.9836,
    "state_goal":4.3165,
    "relation_goal":1.6667,
    "action_goal":0.0,
    "total_goal":2.4752,
    "execution_success_rate":2.3,
    "parsing_error":67.8689,
    "hallucination_error":25.2459,
    "predicate_argument_number_error":2.2951,
    "wrong_order_error":0.0,
    "missing_step_error":11.1475,
    "affordance_error":5.5738,
    "additional_step_error":0.0
  },
  {
    "Model":"Qwen\/Qwen1.5-14B",
    "Model Family":"Qwen1.5",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":14.2,
    "task_success_rate":7.541,
    "state_goal":8.9928,
    "relation_goal":9.4444,
    "action_goal":6.7568,
    "total_goal":8.5809,
    "execution_success_rate":12.5,
    "parsing_error":30.4918,
    "hallucination_error":42.2951,
    "predicate_argument_number_error":2.623,
    "wrong_order_error":0.6557,
    "missing_step_error":13.1148,
    "affordance_error":3.6066,
    "additional_step_error":0.6557
  },
  {
    "Model":"Qwen\/Qwen3-14B",
    "Model Family":"Qwen3",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":14.8,
    "task_success_rate":0.3279,
    "state_goal":1.0791,
    "relation_goal":0.0,
    "action_goal":0.6757,
    "total_goal":0.6601,
    "execution_success_rate":0.3,
    "parsing_error":99.0164,
    "hallucination_error":0.0,
    "predicate_argument_number_error":0.0,
    "wrong_order_error":0.0,
    "missing_step_error":0.6557,
    "affordance_error":0.0,
    "additional_step_error":0.0
  },
  {
    "Model":"Qwen\/Qwen1.5-72B",
    "Model Family":"Qwen1.5",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":72.3,
    "task_success_rate":3.9344,
    "state_goal":14.7482,
    "relation_goal":3.8889,
    "action_goal":1.3514,
    "total_goal":8.2508,
    "execution_success_rate":7.9,
    "parsing_error":18.3607,
    "hallucination_error":47.541,
    "predicate_argument_number_error":5.9016,
    "wrong_order_error":0.3279,
    "missing_step_error":20.9836,
    "affordance_error":4.2623,
    "additional_step_error":0.6557
  },
  {
    "Model":"openai\/gpt-oss-120b",
    "Model Family":"GPT-OSS",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":120.4,
    "task_success_rate":75.7377,
    "state_goal":87.4101,
    "relation_goal":75.5556,
    "action_goal":64.1892,
    "total_goal":78.2178,
    "execution_success_rate":75.1,
    "parsing_error":3.2787,
    "hallucination_error":3.9344,
    "predicate_argument_number_error":0.6557,
    "wrong_order_error":0.9836,
    "missing_step_error":17.7049,
    "affordance_error":0.0,
    "additional_step_error":4.2623
  },
  {
    "Model":"Qwen\/Qwen1.5-4B",
    "Model Family":"Qwen1.5",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":4.0,
    "task_success_rate":0.0,
    "state_goal":1.4388,
    "relation_goal":0.0,
    "action_goal":0.0,
    "total_goal":0.6601,
    "execution_success_rate":1.6,
    "parsing_error":60.9836,
    "hallucination_error":45.9016,
    "predicate_argument_number_error":0.3279,
    "wrong_order_error":0.0,
    "missing_step_error":3.9344,
    "affordance_error":2.2951,
    "additional_step_error":0.0
  },
  {
    "Model":"meta-llama\/Llama-3.1-70B-Instruct",
    "Model Family":"Llama-3",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":70.6,
    "task_success_rate":67.541,
    "state_goal":71.5827,
    "relation_goal":76.6667,
    "action_goal":51.3514,
    "total_goal":68.1518,
    "execution_success_rate":73.1,
    "parsing_error":0.0,
    "hallucination_error":7.2131,
    "predicate_argument_number_error":2.623,
    "wrong_order_error":1.6393,
    "missing_step_error":14.7541,
    "affordance_error":0.6557,
    "additional_step_error":2.623
  },
  {
    "Model":"Qwen\/Qwen-72B",
    "Model Family":"Qwen",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":72.3,
    "task_success_rate":9.5082,
    "state_goal":14.0288,
    "relation_goal":10.5556,
    "action_goal":3.3784,
    "total_goal":10.396,
    "execution_success_rate":14.8,
    "parsing_error":4.5902,
    "hallucination_error":36.0656,
    "predicate_argument_number_error":8.1967,
    "wrong_order_error":1.9672,
    "missing_step_error":31.1475,
    "affordance_error":5.5738,
    "additional_step_error":2.623
  },
  {
    "Model":"Qwen\/Qwen3-32B",
    "Model Family":"Qwen3",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":32.8,
    "task_success_rate":46.2295,
    "state_goal":39.9281,
    "relation_goal":56.1111,
    "action_goal":41.2162,
    "total_goal":45.0495,
    "execution_success_rate":47.9,
    "parsing_error":0.3279,
    "hallucination_error":15.4098,
    "predicate_argument_number_error":1.3115,
    "wrong_order_error":0.6557,
    "missing_step_error":31.4754,
    "affordance_error":2.9508,
    "additional_step_error":1.6393
  },
  {
    "Model":"Qwen\/Qwen3-8B",
    "Model Family":"Qwen3",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":8.2,
    "task_success_rate":0.6557,
    "state_goal":23.3813,
    "relation_goal":0.0,
    "action_goal":0.0,
    "total_goal":10.7261,
    "execution_success_rate":0.0,
    "parsing_error":5.9016,
    "hallucination_error":20.6557,
    "predicate_argument_number_error":5.9016,
    "wrong_order_error":0.0,
    "missing_step_error":65.2459,
    "affordance_error":2.2951,
    "additional_step_error":0.0
  },
  {
    "Model":"meta-llama\/Llama-4-Scout-17B-16E-Instruct",
    "Model Family":"Llama",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":108.6,
    "task_success_rate":62.9508,
    "state_goal":75.5396,
    "relation_goal":66.1111,
    "action_goal":52.7027,
    "total_goal":67.1617,
    "execution_success_rate":63.3,
    "parsing_error":0.0,
    "hallucination_error":8.5246,
    "predicate_argument_number_error":6.5574,
    "wrong_order_error":3.6066,
    "missing_step_error":16.0656,
    "affordance_error":1.9672,
    "additional_step_error":3.6066
  },
  {
    "Model":"deepseek-ai\/DeepSeek-R1",
    "Model Family":"DeepSeek-R1",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":684.5,
    "task_success_rate":57.0492,
    "state_goal":38.4892,
    "relation_goal":69.4444,
    "action_goal":51.3514,
    "total_goal":50.8251,
    "execution_success_rate":63.6,
    "parsing_error":0.3279,
    "hallucination_error":21.3115,
    "predicate_argument_number_error":3.2787,
    "wrong_order_error":0.9836,
    "missing_step_error":9.8361,
    "affordance_error":0.6557,
    "additional_step_error":0.6557
  },
  {
    "Model":"meta-llama\/Llama-3.2-1B-Instruct",
    "Model Family":"Llama-3",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":1.2,
    "task_success_rate":0.0,
    "state_goal":2.1583,
    "relation_goal":0.0,
    "action_goal":0.0,
    "total_goal":1.0526,
    "execution_success_rate":0.0,
    "parsing_error":28.8809,
    "hallucination_error":40.0722,
    "predicate_argument_number_error":12.2744,
    "wrong_order_error":0.0,
    "missing_step_error":25.6318,
    "affordance_error":0.361,
    "additional_step_error":0.0
  },
  {
    "Model":"deepseek-ai\/DeepSeek-V3",
    "Model Family":"DeepSeek",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":684.5,
    "task_success_rate":78.0328,
    "state_goal":80.2158,
    "relation_goal":82.7778,
    "action_goal":70.2703,
    "total_goal":78.5479,
    "execution_success_rate":84.3,
    "parsing_error":0.0,
    "hallucination_error":5.5738,
    "predicate_argument_number_error":0.6557,
    "wrong_order_error":0.0,
    "missing_step_error":9.1803,
    "affordance_error":0.3279,
    "additional_step_error":0.6557
  },
  {
    "Model":"01-ai\/Yi-1.5-34B-Chat",
    "Model Family":"Yi",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":34.4,
    "task_success_rate":29.932,
    "state_goal":27.7978,
    "relation_goal":45.8824,
    "action_goal":32.4324,
    "total_goal":34.1176,
    "execution_success_rate":35.7,
    "parsing_error":0.0,
    "hallucination_error":20.4082,
    "predicate_argument_number_error":7.483,
    "wrong_order_error":1.0204,
    "missing_step_error":30.9524,
    "affordance_error":4.4218,
    "additional_step_error":2.0408
  },
  {
    "Model":"meta-llama\/Meta-Llama-3-70B-Instruct",
    "Model Family":"Llama-3",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":70.6,
    "task_success_rate":57.0492,
    "state_goal":53.5971,
    "relation_goal":68.3333,
    "action_goal":43.9189,
    "total_goal":55.6106,
    "execution_success_rate":62.6,
    "parsing_error":0.3279,
    "hallucination_error":20.0,
    "predicate_argument_number_error":9.1803,
    "wrong_order_error":0.6557,
    "missing_step_error":6.5574,
    "affordance_error":0.6557,
    "additional_step_error":3.9344
  },
  {
    "Model":"LGAI-EXAONE\/EXAONE-3.5-32B-Instruct",
    "Model Family":"Exaone",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":32.0,
    "task_success_rate":48.5246,
    "state_goal":50.7194,
    "relation_goal":52.7778,
    "action_goal":50.0,
    "total_goal":51.1551,
    "execution_success_rate":52.1,
    "parsing_error":10.8197,
    "hallucination_error":18.6885,
    "predicate_argument_number_error":0.6557,
    "wrong_order_error":2.2951,
    "missing_step_error":14.0984,
    "affordance_error":1.3115,
    "additional_step_error":1.6393
  },
  {
    "Model":"openai\/gpt-oss-20b",
    "Model Family":"GPT-OSS",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":21.5,
    "task_success_rate":71.8033,
    "state_goal":83.8129,
    "relation_goal":70.5556,
    "action_goal":54.0541,
    "total_goal":72.6073,
    "execution_success_rate":74.8,
    "parsing_error":11.8033,
    "hallucination_error":3.2787,
    "predicate_argument_number_error":0.0,
    "wrong_order_error":0.0,
    "missing_step_error":10.4918,
    "affordance_error":0.0,
    "additional_step_error":1.3115
  },
  {
    "Model":"meta-llama\/Llama-4-Maverick-17B-128E-Instruct-FP8",
    "Model Family":"Llama",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":401.6,
    "task_success_rate":76.0656,
    "state_goal":88.8489,
    "relation_goal":85.0,
    "action_goal":64.1892,
    "total_goal":81.6832,
    "execution_success_rate":80.0,
    "parsing_error":0.0,
    "hallucination_error":5.5738,
    "predicate_argument_number_error":0.3279,
    "wrong_order_error":0.9836,
    "missing_step_error":11.4754,
    "affordance_error":1.6393,
    "additional_step_error":4.5902
  },
  {
    "Model":"meta-llama\/Meta-Llama-3-8B-Instruct",
    "Model Family":"Llama-3",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":8.0,
    "task_success_rate":28.1967,
    "state_goal":34.1727,
    "relation_goal":25.5556,
    "action_goal":13.5135,
    "total_goal":26.5677,
    "execution_success_rate":32.8,
    "parsing_error":0.0,
    "hallucination_error":38.0328,
    "predicate_argument_number_error":4.5902,
    "wrong_order_error":0.0,
    "missing_step_error":24.2623,
    "affordance_error":0.3279,
    "additional_step_error":1.9672
  },
  {
    "Model":"Qwen\/Qwen3-1.7B",
    "Model Family":"Qwen3",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":2.0,
    "task_success_rate":0.0,
    "state_goal":8.2734,
    "relation_goal":0.0,
    "action_goal":0.0,
    "total_goal":3.7954,
    "execution_success_rate":0.0,
    "parsing_error":26.2295,
    "hallucination_error":48.8525,
    "predicate_argument_number_error":3.2787,
    "wrong_order_error":0.0,
    "missing_step_error":16.7213,
    "affordance_error":4.918,
    "additional_step_error":0.0
  },
  {
    "Model":"meta-llama\/Llama-3.1-8B-Instruct",
    "Model Family":"Llama-3",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":8.0,
    "task_success_rate":36.7213,
    "state_goal":53.2374,
    "relation_goal":31.6667,
    "action_goal":28.3784,
    "total_goal":40.7591,
    "execution_success_rate":36.7,
    "parsing_error":0.0,
    "hallucination_error":20.3279,
    "predicate_argument_number_error":8.1967,
    "wrong_order_error":5.2459,
    "missing_step_error":20.9836,
    "affordance_error":8.5246,
    "additional_step_error":6.5574
  },
  {
    "Model":"deepseek-ai\/DeepSeek-R1-Distill-Qwen-14B",
    "Model Family":"DeepSeek-R1",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":14.8,
    "task_success_rate":23.2558,
    "state_goal":46.5455,
    "relation_goal":30.6818,
    "action_goal":27.027,
    "total_goal":37.0618,
    "execution_success_rate":28.6,
    "parsing_error":2.3256,
    "hallucination_error":10.6312,
    "predicate_argument_number_error":2.6578,
    "wrong_order_error":0.3322,
    "missing_step_error":55.1495,
    "affordance_error":0.3322,
    "additional_step_error":1.9934
  },
  {
    "Model":"mistralai\/Mistral-7B-Instruct-v0.2",
    "Model Family":"Mistral",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":7.2,
    "task_success_rate":25.9016,
    "state_goal":26.6187,
    "relation_goal":30.0,
    "action_goal":32.4324,
    "total_goal":29.0429,
    "execution_success_rate":29.8,
    "parsing_error":3.2787,
    "hallucination_error":20.3279,
    "predicate_argument_number_error":3.9344,
    "wrong_order_error":0.3279,
    "missing_step_error":36.7213,
    "affordance_error":5.5738,
    "additional_step_error":0.3279
  },
  {
    "Model":"deepseek-ai\/DeepSeek-R1-Distill-Qwen-32B",
    "Model Family":"DeepSeek-R1",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":32.8,
    "task_success_rate":49.3333,
    "state_goal":72.4265,
    "relation_goal":46.0227,
    "action_goal":32.8671,
    "total_goal":54.9915,
    "execution_success_rate":55.0,
    "parsing_error":0.3333,
    "hallucination_error":14.6667,
    "predicate_argument_number_error":0.6667,
    "wrong_order_error":0.3333,
    "missing_step_error":25.3333,
    "affordance_error":3.6667,
    "additional_step_error":3.0
  },
  {
    "Model":"moonshotai\/Kimi-K2-Instruct",
    "Model Family":"Kimi",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":1000.0,
    "task_success_rate":76.7213,
    "state_goal":88.8489,
    "relation_goal":80.5556,
    "action_goal":62.8378,
    "total_goal":80.033,
    "execution_success_rate":82.3,
    "parsing_error":2.2951,
    "hallucination_error":1.9672,
    "predicate_argument_number_error":0.3279,
    "wrong_order_error":0.6557,
    "missing_step_error":11.8033,
    "affordance_error":0.6557,
    "additional_step_error":0.9836
  },
  {
    "Model":"deepseek-ai\/DeepSeek-R1-Distill-Qwen-7B",
    "Model Family":"DeepSeek-R1",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":7.6,
    "task_success_rate":0.0,
    "state_goal":0.0,
    "relation_goal":0.0,
    "action_goal":9.0909,
    "total_goal":1.2346,
    "execution_success_rate":0.0,
    "parsing_error":23.6842,
    "hallucination_error":39.4737,
    "predicate_argument_number_error":18.4211,
    "wrong_order_error":0.0,
    "missing_step_error":2.6316,
    "affordance_error":15.7895,
    "additional_step_error":0.0
  },
  {
    "Model":"LGAI-EXAONE\/EXAONE-Deep-32B",
    "Model Family":"Exaone",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":32.0,
    "task_success_rate":1.6393,
    "state_goal":28.4173,
    "relation_goal":1.1111,
    "action_goal":0.0,
    "total_goal":13.3663,
    "execution_success_rate":0.7,
    "parsing_error":5.9016,
    "hallucination_error":6.2295,
    "predicate_argument_number_error":2.9508,
    "wrong_order_error":0.0,
    "missing_step_error":75.082,
    "affordance_error":9.1803,
    "additional_step_error":0.0
  },
  {
    "Model":"Qwen\/Qwen-7B",
    "Model Family":"Qwen",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":7.7,
    "task_success_rate":0.3279,
    "state_goal":0.0,
    "relation_goal":0.5556,
    "action_goal":1.3514,
    "total_goal":0.495,
    "execution_success_rate":0.3,
    "parsing_error":85.2459,
    "hallucination_error":20.3279,
    "predicate_argument_number_error":2.623,
    "wrong_order_error":0.3279,
    "missing_step_error":3.2787,
    "affordance_error":0.6557,
    "additional_step_error":0.0
  },
  {
    "Model":"Qwen\/Qwen3-4B",
    "Model Family":"Qwen3",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":4.0,
    "task_success_rate":41.9672,
    "state_goal":53.5971,
    "relation_goal":43.8889,
    "action_goal":45.9459,
    "total_goal":48.8449,
    "execution_success_rate":48.9,
    "parsing_error":0.0,
    "hallucination_error":2.2951,
    "predicate_argument_number_error":1.6393,
    "wrong_order_error":0.0,
    "missing_step_error":34.4262,
    "affordance_error":12.7869,
    "additional_step_error":1.6393
  },
  {
    "Model":"Qwen\/Qwen3-235B-A22B-Thinking-2507",
    "Model Family":"Qwen3",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":235.1,
    "task_success_rate":4.5902,
    "state_goal":7.554,
    "relation_goal":3.8889,
    "action_goal":4.0541,
    "total_goal":5.6106,
    "execution_success_rate":4.3,
    "parsing_error":86.5574,
    "hallucination_error":7.2131,
    "predicate_argument_number_error":1.3115,
    "wrong_order_error":0.0,
    "missing_step_error":9.1803,
    "affordance_error":0.0,
    "additional_step_error":0.0
  },
  {
    "Model":"01-ai\/Yi-Coder-9B-Chat",
    "Model Family":"Yi",
    "dataset":"virtualhome",
    "eval_type":"action_sequencing_v4",
    "Model Size (B)":8.8,
    "task_success_rate":36.3934,
    "state_goal":51.4388,
    "relation_goal":34.4444,
    "action_goal":22.973,
    "total_goal":39.4389,
    "execution_success_rate":38.7,
    "parsing_error":0.3279,
    "hallucination_error":19.0164,
    "predicate_argument_number_error":7.8689,
    "wrong_order_error":1.9672,
    "missing_step_error":29.1803,
    "affordance_error":2.9508,
    "additional_step_error":2.9508
  }
]