[
  {
    "instruction": "Go forward to the right, around the dining table and chairs, to the piano.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.54.068.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.54.555.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.55.062.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.57.566.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.56.110.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.55.569.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.58.557.png"
    }
  },
  {
    "instruction": "Go forward to the right, around the dining table and chairs, to the piano.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.54.555.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.55.062.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.55.569.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.56.563.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.58.066.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.59.068.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.58.557.png"
    }
  },
  {
    "instruction": "Go forward to the right, around the dining table and chairs, to the piano.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.55.062.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.55.569.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.56.110.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.57.566.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.58.557.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.57.057.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.59.068.png"
    }
  },
  {
    "instruction": "Go forward to the right, around the dining table and chairs, to the piano.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.55.569.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.56.110.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.56.563.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.58.066.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.59.068.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.54.555.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.57.566.png"
    }
  },
  {
    "instruction": "Go forward to the right, around the dining table and chairs, to the piano.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.56.110.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.56.563.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.57.057.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.55.569.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.58.066.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.54.555.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.57.566.png"
    }
  },
  {
    "instruction": "Go forward to the right, around the dining table and chairs, to the piano.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.56.563.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.57.057.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.57.566.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.58.557.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.58.066.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.55.569.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.54.555.png"
    }
  },
  {
    "instruction": "Go forward to the right, around the dining table and chairs, to the piano.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.57.057.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.57.566.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.58.066.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.56.563.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.59.068.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.54.068.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.54.55.569.png"
    }
  },
  {
    "instruction": "From the cabinet by the wall, go forward right to the sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.02.573.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.03.104.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.03.622.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.04.563.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.08.106.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.06.568.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.05.569.png"
    }
  },
  {
    "instruction": "From the cabinet by the wall, go forward right to the sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.03.104.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.03.622.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.04.069.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.05.100.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.07.601.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.06.568.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.02.573.png"
    }
  },
  {
    "instruction": "From the cabinet by the wall, go forward right to the sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.03.622.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.04.069.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.04.563.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.05.569.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.07.601.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.06.568.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.05.100.png"
    }
  },
  {
    "instruction": "From the cabinet by the wall, go forward right to the sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.04.069.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.04.563.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.05.100.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.06.108.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.02.573.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.03.104.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.08.106.png"
    }
  },
  {
    "instruction": "From the cabinet by the wall, go forward right to the sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.04.563.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.05.100.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.05.569.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.03.104.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.06.568.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.08.106.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.07.070.png"
    }
  },
  {
    "instruction": "From the cabinet by the wall, go forward right to the sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.05.100.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.05.569.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.06.108.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.06.568.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.03.622.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.07.070.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.03.104.png"
    }
  },
  {
    "instruction": "From the cabinet by the wall, go forward right to the sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.05.569.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.06.108.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.06.568.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.03.104.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.07.601.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.07.070.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.08.106.png"
    }
  },
  {
    "instruction": "From the cabinet by the wall, go forward right to the sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.06.108.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.06.568.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.07.070.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.08.106.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.03.104.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.02.573.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.03.622.png"
    }
  },
  {
    "instruction": "From the sofa, turn right to the dining table and chairs.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.08.106.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.08.569.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.09.108.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.11.613.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.10.099.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.09.602.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.10.618.png"
    }
  },
  {
    "instruction": "From the sofa, turn right to the dining table and chairs.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.08.569.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.09.108.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.09.602.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.11.613.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.10.618.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.10.099.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.11.115.png"
    }
  },
  {
    "instruction": "From the sofa, turn right to the dining table and chairs.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.09.108.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.09.602.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.10.099.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.08.106.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.11.613.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.08.569.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.11.115.png"
    }
  },
  {
    "instruction": "From the sofa, turn right to the dining table and chairs.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.09.602.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.10.099.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.10.618.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.08.106.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.11.613.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.08.569.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.11.115.png"
    }
  },
  {
    "instruction": "From the dining table and chairs, go to the left sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.13.110.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.13.602.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.14.108.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.16.106.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.16.611.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.14.613.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.15.118.png"
    }
  },
  {
    "instruction": "From the dining table and chairs, go to the left sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.13.602.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.14.108.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.14.613.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.16.106.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.15.601.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.16.611.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.13.110.png"
    }
  },
  {
    "instruction": "From the dining table and chairs, go to the left sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.14.108.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.14.613.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.15.118.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.13.110.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.16.106.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.16.611.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.15.601.png"
    }
  },
  {
    "instruction": "From the dining table and chairs, go to the left sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.14.613.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.15.118.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.15.601.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.13.110.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.14.108.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.13.602.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.16.611.png"
    }
  },
  {
    "instruction": "Start from the sofa, turn right, see the sofa, the plant, and the window, then walk forward, pass the plant, and go to the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.16.611.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.17.117.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.17.610.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.19.105.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.18.600.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.18.116.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.20.115.png"
    }
  },
  {
    "instruction": "Start from the sofa, turn right, see the sofa, the plant, and the window, then walk forward, pass the plant, and go to the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.17.117.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.17.610.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.18.116.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.16.611.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.20.617.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.19.105.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.19.613.png"
    }
  },
  {
    "instruction": "Start from the sofa, turn right, see the sofa, the plant, and the window, then walk forward, pass the plant, and go to the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.17.610.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.18.116.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.18.600.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.20.115.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.19.105.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.19.613.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.16.611.png"
    }
  },
  {
    "instruction": "Start from the sofa, turn right, see the sofa, the plant, and the window, then walk forward, pass the plant, and go to the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.18.116.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.18.600.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.19.105.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.17.117.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.17.610.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.20.617.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.20.115.png"
    }
  },
  {
    "instruction": "Start from the sofa, turn right, see the sofa, the plant, and the window, then walk forward, pass the plant, and go to the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.18.600.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.19.105.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.19.613.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.20.617.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.20.115.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.17.610.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.16.611.png"
    }
  },
  {
    "instruction": "Pass the dining table and chairs, walk forward, pass the piano and the plant, and go to the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.32.103.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.32.600.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.33.112.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.34.607.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.34.116.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.35.619.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.35.113.png"
    }
  },
  {
    "instruction": "Pass the dining table and chairs, walk forward, pass the piano and the plant, and go to the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.32.600.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.33.112.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.33.600.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.34.607.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.32.103.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.35.619.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.34.116.png"
    }
  },
  {
    "instruction": "Pass the dining table and chairs, walk forward, pass the piano and the plant, and go to the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.33.112.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.33.600.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.34.116.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.32.103.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.36.100.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.35.113.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.35.619.png"
    }
  },
  {
    "instruction": "Pass the dining table and chairs, walk forward, pass the piano and the plant, and go to the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.33.600.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.34.116.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.34.607.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.33.112.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.35.619.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.36.100.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.32.103.png"
    }
  },
  {
    "instruction": "Pass the dining table and chairs, walk forward, pass the piano and the plant, and go to the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.34.116.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.34.607.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.35.113.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.32.600.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.33.112.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.36.100.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.32.103.png"
    }
  },
  {
    "instruction": "Start from beside the sofa and the plant, slightly turn right, see the dining table and chairs, and walk to them.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.37.114.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.37.614.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.38.111.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.39.600.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.39.104.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.40.117.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.38.609.png"
    }
  },
  {
    "instruction": "Start from beside the sofa and the plant, slightly turn right, see the dining table and chairs, and walk to them.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.37.614.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.38.111.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.38.609.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.39.104.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.39.600.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.40.117.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.37.114.png"
    }
  },
  {
    "instruction": "Start from beside the sofa and the plant, slightly turn right, see the dining table and chairs, and walk to them.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.38.111.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.38.609.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.39.104.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.37.614.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.39.600.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.40.117.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.37.114.png"
    }
  },
  {
    "instruction": "Start from beside the sofa and the plant, slightly turn right, see the dining table and chairs, and walk to them.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.38.609.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.39.104.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.39.600.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.40.614.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.37.114.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.37.614.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.40.117.png"
    }
  },
  {
    "instruction": "Start from beside the sofa and the plant, slightly turn right, see the dining table and chairs, and walk to them.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.39.104.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.39.600.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.40.117.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.41.109.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.38.609.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.40.614.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.37.114.png"
    }
  },
  {
    "instruction": "Walk forward to the piano, go around the piano, and walk to the sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.45.670.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.46.116.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.46.609.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.49.651.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.49.122.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.47.608.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.48.671.png"
    }
  },
  {
    "instruction": "Walk forward to the piano, go around the piano, and walk to the sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.46.116.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.46.609.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.47.109.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.48.671.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.49.651.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.47.608.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.48.106.png"
    }
  },
  {
    "instruction": "Walk forward to the piano, go around the piano, and walk to the sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.46.609.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.47.109.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.47.608.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.48.106.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.45.670.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.48.671.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.49.122.png"
    }
  },
  {
    "instruction": "Walk forward to the piano, go around the piano, and walk to the sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.47.109.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.47.608.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.48.106.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.49.122.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.48.671.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.46.609.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.46.116.png"
    }
  },
  {
    "instruction": "Walk forward to the piano, go around the piano, and walk to the sofa.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.47.608.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.48.106.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.48.671.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.46.609.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.46.116.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.47.109.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_02.55.49.651.png"
    }
  },
  {
    "instruction": "From your current position, walk forward and go to the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.51.060.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.51.611.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.52.064.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.56.107.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.55.565.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.54.612.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.53.113.png"
    }
  },
  {
    "instruction": "From your current position, walk forward and go to the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.51.611.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.52.064.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.52.557.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.53.567.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.55.111.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.55.565.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.54.612.png"
    }
  },
  {
    "instruction": "From your current position, walk forward and go to the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.52.064.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.52.557.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.53.113.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.54.061.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.55.565.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.51.060.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.53.567.png"
    }
  },
  {
    "instruction": "From your current position, walk forward and go to the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.52.557.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.53.113.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.53.567.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.55.565.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.54.061.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.54.612.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.51.611.png"
    }
  },
  {
    "instruction": "From your current position, walk forward and go to the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.53.113.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.53.567.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.54.061.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.56.107.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.54.612.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.55.111.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.55.565.png"
    }
  },
  {
    "instruction": "From your current position, walk forward and go to the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.53.567.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.54.061.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.54.612.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.52.557.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.55.565.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.52.064.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.56.107.png"
    }
  },
  {
    "instruction": "From your current position, walk forward and go to the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.54.061.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.54.612.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.55.111.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.56.107.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.51.611.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.53.113.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.51.060.png"
    }
  },
  {
    "instruction": "Start from the TV, turn right, see two tables and chairs, and walk to the table and chairs on the left.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.57.106.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.57.610.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.58.119.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.58.602.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.01.616.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.00.115.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.59.105.png"
    }
  },
  {
    "instruction": "Start from the TV, turn right, see two tables and chairs, and walk to the table and chairs on the left.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.57.610.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.58.119.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.58.602.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.59.601.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.59.105.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.00.115.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.00.604.png"
    }
  },
  {
    "instruction": "Start from the TV, turn right, see two tables and chairs, and walk to the table and chairs on the left.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.58.119.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.58.602.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.59.105.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.01.112.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.02.099.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.01.616.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.00.115.png"
    }
  },
  {
    "instruction": "Start from the TV, turn right, see two tables and chairs, and walk to the table and chairs on the left.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.58.602.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.59.105.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.59.601.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.57.106.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.00.604.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.01.112.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.57.610.png"
    }
  },
  {
    "instruction": "Start from the TV, turn right, see two tables and chairs, and walk to the table and chairs on the left.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.59.105.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.59.601.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.00.115.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.01.112.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.57.106.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.58.602.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.02.099.png"
    }
  },
  {
    "instruction": "Start from the TV, turn right, see two tables and chairs, and walk to the table and chairs on the left.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.59.601.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.00.115.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.00.604.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.59.105.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.57.610.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.02.099.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.01.616.png"
    }
  },
  {
    "instruction": "Start from the TV, turn right, see two tables and chairs, and walk to the table and chairs on the left.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.00.115.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.00.604.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.01.112.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.02.099.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.58.602.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.58.119.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.10.59.601.png"
    }
  },
  {
    "instruction": "From your current position, pass by the trash can and walk to the cabinet under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.05.613.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.06.114.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.06.603.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.10.611.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.07.108.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.07.601.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.08.607.png"
    }
  },
  {
    "instruction": "From your current position, pass by the trash can and walk to the cabinet under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.06.114.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.06.603.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.07.108.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.11.115.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.08.607.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.10.107.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.08.115.png"
    }
  },
  {
    "instruction": "From your current position, pass by the trash can and walk to the cabinet under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.06.603.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.07.108.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.07.601.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.08.607.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.05.613.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.09.614.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.06.114.png"
    }
  },
  {
    "instruction": "From your current position, pass by the trash can and walk to the cabinet under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.07.108.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.07.601.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.08.115.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.09.614.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.11.115.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.08.607.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.09.101.png"
    }
  },
  {
    "instruction": "From your current position, pass by the trash can and walk to the cabinet under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.07.601.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.08.115.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.08.607.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.10.611.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.10.107.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.09.614.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.06.603.png"
    }
  },
  {
    "instruction": "From your current position, pass by the trash can and walk to the cabinet under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.08.115.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.08.607.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.09.101.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.09.614.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.10.107.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.07.601.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.07.108.png"
    }
  },
  {
    "instruction": "From your current position, pass by the trash can and walk to the cabinet under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.08.607.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.09.101.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.09.614.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.07.601.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.10.611.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.06.603.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.07.108.png"
    }
  },
  {
    "instruction": "From your current position, pass by the trash can and walk to the cabinet under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.09.101.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.09.614.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.10.107.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.11.115.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.10.611.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.06.603.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.08.115.png"
    }
  },
  {
    "instruction": "From your current position, see a white chair, turn right, and walk to the farthest chair.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.12.101.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.12.601.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.13.108.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.14.608.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.13.611.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.16.117.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.14.110.png"
    }
  },
  {
    "instruction": "From your current position, see a white chair, turn right, and walk to the farthest chair.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.12.601.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.13.108.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.13.611.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.14.608.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.14.110.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.15.602.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.15.105.png"
    }
  },
  {
    "instruction": "From your current position, see a white chair, turn right, and walk to the farthest chair.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.13.108.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.13.611.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.14.110.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.15.602.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.14.608.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.15.105.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.12.601.png"
    }
  },
  {
    "instruction": "From your current position, see a white chair, turn right, and walk to the farthest chair.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.13.611.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.14.110.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.14.608.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.15.602.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.12.101.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.15.105.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.16.117.png"
    }
  },
  {
    "instruction": "From your current position, see a white chair, turn right, and walk to the farthest chair.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.14.110.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.14.608.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.15.105.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.13.611.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.15.602.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.16.117.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.12.601.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the right and walk to the area under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.17.602.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.18.103.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.18.600.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.109.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.19.613.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.25.605.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.24.117.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the right and walk to the area under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.18.103.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.18.600.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.19.117.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.24.604.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.20.108.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.21.116.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.606.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the right and walk to the area under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.18.600.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.19.117.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.19.613.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.20.604.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.22.112.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.606.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.21.116.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the right and walk to the area under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.19.117.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.19.613.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.20.108.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.109.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.21.116.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.18.600.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.24.117.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the right and walk to the area under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.19.613.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.20.108.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.20.604.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.21.610.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.18.600.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.25.103.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.18.103.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the right and walk to the area under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.20.108.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.20.604.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.21.116.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.606.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.24.604.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.22.112.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.18.103.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the right and walk to the area under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.20.604.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.21.116.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.21.610.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.22.614.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.25.605.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.25.103.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.19.613.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the right and walk to the area under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.21.116.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.21.610.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.22.112.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.606.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.109.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.19.613.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.25.605.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the right and walk to the area under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.21.610.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.22.112.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.22.614.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.20.604.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.606.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.19.117.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.19.613.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the right and walk to the area under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.22.112.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.22.614.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.109.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.24.604.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.24.117.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.19.613.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.21.610.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the right and walk to the area under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.22.614.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.109.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.606.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.18.600.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.22.112.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.24.604.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.17.602.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the right and walk to the area under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.109.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.606.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.24.117.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.25.103.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.18.103.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.22.112.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.24.604.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the right and walk to the area under the TV.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.23.606.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.24.117.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.24.604.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.18.600.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.22.614.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.20.604.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.25.605.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the left and walk to the trash can.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.26.611.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.27.110.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.27.614.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.30.109.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.28.611.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.29.113.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.28.113.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the left and walk to the trash can.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.27.110.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.27.614.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.28.113.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.28.611.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.29.113.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.30.109.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.29.600.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the left and walk to the trash can.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.27.614.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.28.113.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.28.611.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.27.110.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.30.109.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.26.611.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.29.600.png"
    }
  },
  {
    "instruction": "From your current position, move forward to the left and walk to the trash can.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.28.113.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.28.611.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.29.113.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.27.614.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.26.611.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.30.109.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.11.29.600.png"
    }
  },
  {
    "instruction": "From your current position, enter the room, pass by the desk and chair, and walk to the bed inside.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.09.435.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.09.949.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.10.445.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.10.937.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.11.945.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.12.945.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.11.437.png"
    }
  },
  {
    "instruction": "From your current position, enter the room, pass by the desk and chair, and walk to the bed inside.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.09.949.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.10.445.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.10.937.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.11.945.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.13.946.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.09.435.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.12.945.png"
    }
  },
  {
    "instruction": "From your current position, enter the room, pass by the desk and chair, and walk to the bed inside.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.10.445.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.10.937.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.11.437.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.09.435.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.12.440.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.11.945.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.12.945.png"
    }
  },
  {
    "instruction": "From your current position, enter the room, pass by the desk and chair, and walk to the bed inside.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.10.937.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.11.437.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.11.945.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.09.949.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.13.439.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.13.946.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.12.945.png"
    }
  },
  {
    "instruction": "From your current position, enter the room, pass by the desk and chair, and walk to the bed inside.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.11.437.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.11.945.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.12.440.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.13.439.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.09.435.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.10.937.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.09.949.png"
    }
  },
  {
    "instruction": "From your current position, enter the room, pass by the desk and chair, and walk to the bed inside.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.11.945.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.12.440.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.12.945.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.11.437.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.10.445.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.13.946.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.13.439.png"
    }
  },
  {
    "instruction": "Turn right from the bed and walk to the desk.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.14.449.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.14.945.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.15.438.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.17.935.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.18.445.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.16.437.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.17.442.png"
    }
  },
  {
    "instruction": "Turn right from the bed and walk to the desk.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.14.945.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.15.438.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.15.936.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.17.442.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.16.935.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.18.445.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.14.449.png"
    }
  },
  {
    "instruction": "Turn right from the bed and walk to the desk.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.15.438.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.15.936.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.16.437.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.18.445.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.14.945.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.17.935.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.17.442.png"
    }
  },
  {
    "instruction": "Turn right from the bed and walk to the desk.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.15.936.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.16.437.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.16.935.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.15.438.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.14.945.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.17.935.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.17.442.png"
    }
  },
  {
    "instruction": "Turn right from the bed and walk to the desk.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.16.437.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.16.935.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.17.442.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.18.445.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.14.945.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.14.449.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.17.935.png"
    }
  },
  {
    "instruction": "From your current position, enter the bedroom, walk to the bed on the right, then go between the two beds, and walk toward the nightstand.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.26.444.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.26.949.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.27.448.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.27.940.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.28.445.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.31.446.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.29.943.png"
    }
  },
  {
    "instruction": "From your current position, enter the bedroom, walk to the bed on the right, then go between the two beds, and walk toward the nightstand.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.26.949.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.27.448.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.27.940.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.28.445.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.31.446.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.29.438.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.28.941.png"
    }
  },
  {
    "instruction": "From your current position, enter the bedroom, walk to the bed on the right, then go between the two beds, and walk toward the nightstand.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.27.448.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.27.940.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.28.445.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.26.949.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.28.941.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.26.444.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.29.438.png"
    }
  },
  {
    "instruction": "From your current position, enter the bedroom, walk to the bed on the right, then go between the two beds, and walk toward the nightstand.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.27.940.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.28.445.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.28.941.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.31.446.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.29.943.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.26.444.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.29.438.png"
    }
  },
  {
    "instruction": "From your current position, enter the bedroom, walk to the bed on the right, then go between the two beds, and walk toward the nightstand.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.28.445.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.28.941.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.29.438.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.30.443.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.27.448.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.29.943.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.26.444.png"
    }
  },
  {
    "instruction": "From your current position, enter the bedroom, walk to the bed on the right, then go between the two beds, and walk toward the nightstand.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.28.941.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.29.438.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.29.943.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.26.444.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.27.940.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.30.943.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.30.443.png"
    }
  },
  {
    "instruction": "From your current position, enter the bedroom, walk to the bed on the right, then go between the two beds, and walk toward the nightstand.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.29.438.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.29.943.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.30.443.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.27.940.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.26.444.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.31.446.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/0/2025-09-10_03.20.26.949.png"
    }
  },
  {
    "instruction": "From your current position, walk forward to the right, see several desks and chairs, then walk forward to the left and go to the desk and chair by the wall. Sit on the chair.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.43.865.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.44.361.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.44.864.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.46.365.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.45.857.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.47.867.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.46.862.png"
    }
  },
  {
    "instruction": "From your current position, walk forward to the right, see several desks and chairs, then walk forward to the left and go to the desk and chair by the wall. Sit on the chair.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.44.361.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.44.864.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.45.367.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.47.358.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.48.364.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.45.857.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.46.365.png"
    }
  },
  {
    "instruction": "From your current position, walk forward to the right, see several desks and chairs, then walk forward to the left and go to the desk and chair by the wall. Sit on the chair.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.44.864.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.45.367.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.45.857.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.46.862.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.46.365.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.44.361.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.47.867.png"
    }
  },
  {
    "instruction": "From your current position, walk forward to the right, see several desks and chairs, then walk forward to the left and go to the desk and chair by the wall. Sit on the chair.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.45.367.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.45.857.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.46.365.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.46.862.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.47.358.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.48.364.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.47.867.png"
    }
  },
  {
    "instruction": "From your current position, walk forward to the right, see several desks and chairs, then walk forward to the left and go to the desk and chair by the wall. Sit on the chair.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.45.857.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.46.365.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.46.862.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.47.358.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.44.864.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.47.867.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.43.865.png"
    }
  },
  {
    "instruction": "From your current position, walk forward to the right, see several desks and chairs, then walk forward to the left and go to the desk and chair by the wall. Sit on the chair.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.46.365.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.46.862.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.47.358.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.48.364.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.45.857.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.44.864.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.48.858.png"
    }
  },
  {
    "instruction": "From your current position, walk forward to the right, see several desks and chairs, then walk forward to the left and go to the desk and chair by the wall. Sit on the chair.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.46.862.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.47.358.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.47.867.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.44.864.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.48.858.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.46.365.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.44.361.png"
    }
  },
  {
    "instruction": "Step down from the desk and chair, turn right, and walk to the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.48.858.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.49.361.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.49.859.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.365.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.50.358.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.51.858.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.50.859.png"
    }
  },
  {
    "instruction": "Step down from the desk and chair, turn right, and walk to the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.49.361.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.49.859.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.50.358.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.365.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.51.858.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.48.858.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.51.361.png"
    }
  },
  {
    "instruction": "Step down from the desk and chair, turn right, and walk to the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.49.859.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.50.358.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.50.859.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.51.858.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.365.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.48.858.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.51.361.png"
    }
  },
  {
    "instruction": "Step down from the desk and chair, turn right, and walk to the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.50.358.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.50.859.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.51.361.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.48.858.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.49.361.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.51.858.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.365.png"
    }
  },
  {
    "instruction": "Walk to the bookshelf, turn right, and go to the chair in the middle of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.51.361.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.51.858.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.365.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.53.360.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.55.363.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.867.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.54.366.png"
    }
  },
  {
    "instruction": "Walk to the bookshelf, turn right, and go to the chair in the middle of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.51.858.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.365.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.867.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.55.916.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.53.360.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.55.363.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.53.859.png"
    }
  },
  {
    "instruction": "Walk to the bookshelf, turn right, and go to the chair in the middle of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.365.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.867.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.53.360.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.54.366.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.55.916.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.55.363.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.53.859.png"
    }
  },
  {
    "instruction": "Walk to the bookshelf, turn right, and go to the chair in the middle of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.867.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.53.360.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.53.859.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.55.916.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.54.860.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.365.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.54.366.png"
    }
  },
  {
    "instruction": "Walk to the bookshelf, turn right, and go to the chair in the middle of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.53.360.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.53.859.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.54.366.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.51.858.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.55.363.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.55.916.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.365.png"
    }
  },
  {
    "instruction": "Walk to the bookshelf, turn right, and go to the chair in the middle of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.53.859.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.54.366.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.54.860.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.55.363.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.52.867.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.51.858.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.55.916.png"
    }
  },
  {
    "instruction": "From the chair, turn right to the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.56.916.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.57.363.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.57.869.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.59.368.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.59.915.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.58.866.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.00.416.png"
    }
  },
  {
    "instruction": "From the chair, turn right to the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.57.363.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.57.869.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.58.417.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.59.368.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.58.866.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.56.916.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.01.411.png"
    }
  },
  {
    "instruction": "From the chair, turn right to the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.57.869.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.58.417.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.58.866.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.56.916.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.59.915.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.00.416.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.01.411.png"
    }
  },
  {
    "instruction": "From the chair, turn right to the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.58.417.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.58.866.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.59.368.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.57.363.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.00.866.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.00.416.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.01.411.png"
    }
  },
  {
    "instruction": "From the chair, turn right to the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.58.866.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.59.368.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.59.915.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.57.363.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.00.416.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.56.916.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.00.866.png"
    }
  },
  {
    "instruction": "From the chair, turn right to the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.59.368.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.59.915.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.00.416.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.01.411.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.00.866.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.56.916.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.27.57.363.png"
    }
  },
  {
    "instruction": "From your current position, walk to the desk and chair on the right.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.05.916.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.06.411.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.06.916.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.07.415.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.08.416.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.07.908.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.09.418.png"
    }
  },
  {
    "instruction": "From your current position, walk to the desk and chair on the right.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.06.411.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.06.916.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.07.415.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.08.912.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.07.908.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.09.418.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.08.416.png"
    }
  },
  {
    "instruction": "From your current position, walk to the desk and chair on the right.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.06.916.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.07.415.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.07.908.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.05.916.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.08.912.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.06.411.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.08.416.png"
    }
  },
  {
    "instruction": "From your current position, walk to the desk and chair on the right.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.07.415.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.07.908.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.08.416.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.05.916.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.09.418.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.06.411.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/3/2025-09-10_03.28.08.912.png"
    }
  },
  {
    "instruction": "From your current position, go around a set of table and chairs, and walk to the area beside the sofa and coffee table.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.21.178.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.21.685.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.22.175.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.23.183.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.24.679.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.24.186.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.22.680.png"
    }
  },
  {
    "instruction": "From your current position, go around a set of table and chairs, and walk to the area beside the sofa and coffee table.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.21.685.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.22.175.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.22.680.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.25.182.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.21.178.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.24.679.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.23.688.png"
    }
  },
  {
    "instruction": "From your current position, go around a set of table and chairs, and walk to the area beside the sofa and coffee table.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.22.175.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.22.680.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.23.183.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.21.685.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.23.688.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.24.186.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.24.679.png"
    }
  },
  {
    "instruction": "From your current position, go around a set of table and chairs, and walk to the area beside the sofa and coffee table.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.22.680.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.23.183.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.23.688.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.24.679.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.22.175.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.21.685.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.24.186.png"
    }
  },
  {
    "instruction": "From your current position, go around a set of table and chairs, and walk to the area beside the sofa and coffee table.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.23.183.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.23.688.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.24.186.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.22.175.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.24.679.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.22.680.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.25.182.png"
    }
  },
  {
    "instruction": "From your current position, go around the table and chairs ahead, and walk right to the area beside the sofa and coffee table.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.29.175.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.29.682.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.30.182.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.31.683.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.31.186.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.30.678.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.33.685.png"
    }
  },
  {
    "instruction": "From your current position, go around the table and chairs ahead, and walk right to the area beside the sofa and coffee table.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.29.682.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.30.182.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.30.678.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.31.683.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.29.175.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.34.180.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.33.178.png"
    }
  },
  {
    "instruction": "From your current position, go around the table and chairs ahead, and walk right to the area beside the sofa and coffee table.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.30.182.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.30.678.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.31.186.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.33.685.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.34.180.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.32.179.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.32.687.png"
    }
  },
  {
    "instruction": "From your current position, go around the table and chairs ahead, and walk right to the area beside the sofa and coffee table.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.30.678.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.31.186.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.31.683.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.30.182.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.32.687.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.34.180.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.29.682.png"
    }
  },
  {
    "instruction": "From your current position, go around the table and chairs ahead, and walk right to the area beside the sofa and coffee table.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.31.186.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.31.683.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.32.179.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.29.682.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.33.178.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.30.678.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.29.175.png"
    }
  },
  {
    "instruction": "From your current position, go around the table and chairs ahead, and walk right to the area beside the sofa and coffee table.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.31.683.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.32.179.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.32.687.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.31.186.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.29.175.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.34.180.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.33.685.png"
    }
  },
  {
    "instruction": "From your current position, go around the table and chairs ahead, and walk right to the area beside the sofa and coffee table.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.32.179.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.32.687.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.33.178.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.29.682.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.31.683.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.30.678.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/4/2025-09-10_03.36.34.180.png"
    }
  },
  {
    "instruction": "From your current position, go around the chair and walk to the cabinet in the corner of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.50.976.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.51.475.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.51.967.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.53.964.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.52.974.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.53.462.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.54.474.png"
    }
  },
  {
    "instruction": "From your current position, go around the chair and walk to the cabinet in the corner of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.51.475.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.51.967.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.52.461.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.53.964.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.54.474.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.50.976.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.53.462.png"
    }
  },
  {
    "instruction": "From your current position, go around the chair and walk to the cabinet in the corner of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.51.967.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.52.461.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.52.974.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.53.964.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.53.462.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.51.475.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.54.474.png"
    }
  },
  {
    "instruction": "From your current position, go around the chair and walk to the cabinet in the corner of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.52.461.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.52.974.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.53.462.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.54.474.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.51.475.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.50.976.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.51.967.png"
    }
  },
  {
    "instruction": "Start from the cabinet in front, turn right, walk toward the table and chairs on the other side of the room, and go to the table and chairs on the right.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.54.474.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.54.959.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.55.454.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.57.457.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.56.460.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.56.956.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.55.962.png"
    }
  },
  {
    "instruction": "Start from the cabinet in front, turn right, walk toward the table and chairs on the other side of the room, and go to the table and chairs on the right.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.54.959.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.55.454.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.55.962.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.56.956.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.56.460.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.58.458.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.57.457.png"
    }
  },
  {
    "instruction": "Start from the cabinet in front, turn right, walk toward the table and chairs on the other side of the room, and go to the table and chairs on the right.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.55.454.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.55.962.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.56.460.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.56.956.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.57.457.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.58.458.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.54.474.png"
    }
  },
  {
    "instruction": "Start from the cabinet in front, turn right, walk toward the table and chairs on the other side of the room, and go to the table and chairs on the right.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.55.962.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.56.460.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.56.956.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.54.959.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.57.956.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.57.457.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.55.454.png"
    }
  },
  {
    "instruction": "Start from the cabinet in front, turn right, walk toward the table and chairs on the other side of the room, and go to the table and chairs on the right.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.56.460.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.56.956.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.57.457.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.54.474.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.55.962.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.58.458.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.54.959.png"
    }
  },
  {
    "instruction": "Start from the table and chairs in front, go around them, and walk to the speaker in the corner.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.59.465.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.59.972.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.00.465.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.02.452.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.01.503.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.01.956.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.02.953.png"
    }
  },
  {
    "instruction": "Start from the table and chairs in front, go around them, and walk to the speaker in the corner.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.59.972.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.00.465.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.00.970.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.01.956.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.02.953.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.01.503.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.59.465.png"
    }
  },
  {
    "instruction": "Start from the table and chairs in front, go around them, and walk to the speaker in the corner.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.00.465.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.00.970.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.01.503.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.02.452.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.59.465.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.59.972.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.02.953.png"
    }
  },
  {
    "instruction": "Start from the table and chairs in front, go around them, and walk to the speaker in the corner.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.00.970.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.01.503.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.01.956.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.47.59.972.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.02.452.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.00.465.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.02.953.png"
    }
  },
  {
    "instruction": "Go around the table and chairs on the right, past the speaker and table and chairs, to the window",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.06.469.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.06.956.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.07.458.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.10.972.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.08.457.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.10.472.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.09.964.png"
    }
  },
  {
    "instruction": "Go around the table and chairs on the right, past the speaker and table and chairs, to the window",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.06.956.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.07.458.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.07.957.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.06.469.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.10.972.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.08.958.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.10.472.png"
    }
  },
  {
    "instruction": "Go around the table and chairs on the right, past the speaker and table and chairs, to the window",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.07.458.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.07.957.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.08.457.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.06.469.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.10.472.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.06.956.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.09.460.png"
    }
  },
  {
    "instruction": "Go around the table and chairs on the right, past the speaker and table and chairs, to the window",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.07.957.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.08.457.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.08.958.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.10.472.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.06.956.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.10.972.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.09.964.png"
    }
  },
  {
    "instruction": "Go around the table and chairs on the right, past the speaker and table and chairs, to the window",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.08.457.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.08.958.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.09.460.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.07.458.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.06.469.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.10.972.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.10.472.png"
    }
  },
  {
    "instruction": "Go around the table and chairs on the right, past the speaker and table and chairs, to the window",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.08.958.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.09.460.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.09.964.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.10.972.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.07.458.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.06.956.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.07.957.png"
    }
  },
  {
    "instruction": "From your current position, walk to the table and chairs at the diagonal corner of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.11.969.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.12.470.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.12.964.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.16.012.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.16.503.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.14.967.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.13.975.png"
    }
  },
  {
    "instruction": "From your current position, walk to the table and chairs at the diagonal corner of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.12.470.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.12.964.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.13.468.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.16.012.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.14.517.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.14.967.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.15.519.png"
    }
  },
  {
    "instruction": "From your current position, walk to the table and chairs at the diagonal corner of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.12.964.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.13.468.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.13.975.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.14.967.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.15.519.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.11.969.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.12.470.png"
    }
  },
  {
    "instruction": "From your current position, walk to the table and chairs at the diagonal corner of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.13.468.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.13.975.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.14.517.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.15.519.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.12.964.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.16.012.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.14.967.png"
    }
  },
  {
    "instruction": "From your current position, walk to the table and chairs at the diagonal corner of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.13.975.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.14.517.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.14.967.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.12.470.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.11.969.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.16.012.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.12.964.png"
    }
  },
  {
    "instruction": "From your current position, walk to the table and chairs at the diagonal corner of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.14.517.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.14.967.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.15.519.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.16.503.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.12.470.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.13.975.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.16.012.png"
    }
  },
  {
    "instruction": "Go from the front table to the small table in the center.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.21.001.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.21.518.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.22.025.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.24.012.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.23.510.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.22.511.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.23.005.png"
    }
  },
  {
    "instruction": "Go from the front table to the small table in the center.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.21.518.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.22.025.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.22.511.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.23.005.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.24.508.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.24.012.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.23.510.png"
    }
  },
  {
    "instruction": "Go from the front table to the small table in the center.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.22.025.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.22.511.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.23.005.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.24.508.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.24.012.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.21.001.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.23.510.png"
    }
  },
  {
    "instruction": "Go from the front table to the small table in the center.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.22.511.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.23.005.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.23.510.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.24.508.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.22.025.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.24.012.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/5/2025-09-12_16.48.21.518.png"
    }
  },
  {
    "instruction": "Go past the Violent Bear decoration, cabinet, and bed to the desk and chair on the left front.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.34.413.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.34.915.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.415.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.417.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.917.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.912.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.415.png"
    }
  },
  {
    "instruction": "Go past the Violent Bear decoration, cabinet, and bed to the desk and chair on the left front.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.34.915.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.415.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.912.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.417.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.917.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.418.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.34.413.png"
    }
  },
  {
    "instruction": "Go past the Violent Bear decoration, cabinet, and bed to the desk and chair on the left front.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.415.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.912.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.417.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.34.413.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.908.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.418.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.917.png"
    }
  },
  {
    "instruction": "Go past the Violent Bear decoration, cabinet, and bed to the desk and chair on the left front.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.912.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.417.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.917.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.415.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.34.915.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.909.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.418.png"
    }
  },
  {
    "instruction": "Go past the Violent Bear decoration, cabinet, and bed to the desk and chair on the left front.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.417.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.917.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.418.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.909.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.415.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.415.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.912.png"
    }
  },
  {
    "instruction": "Go past the Violent Bear decoration, cabinet, and bed to the desk and chair on the left front.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.917.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.418.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.909.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.415.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.39.412.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.415.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.908.png"
    }
  },
  {
    "instruction": "Go past the Violent Bear decoration, cabinet, and bed to the desk and chair on the left front.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.418.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.909.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.415.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.417.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.34.413.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.917.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.39.412.png"
    }
  },
  {
    "instruction": "From the desk, turn left to the nightstand between the two beds.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.34.413.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.34.915.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.415.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.912.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.417.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.39.412.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.909.png"
    }
  },
  {
    "instruction": "From the desk, turn left to the nightstand between the two beds.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.34.915.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.415.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.912.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.39.412.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.909.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.917.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.417.png"
    }
  },
  {
    "instruction": "From the desk, turn left to the nightstand between the two beds.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.415.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.912.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.417.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.917.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.418.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.39.412.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.34.413.png"
    }
  },
  {
    "instruction": "From the desk, turn left to the nightstand between the two beds.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.912.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.417.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.917.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.909.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.908.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.415.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.34.413.png"
    }
  },
  {
    "instruction": "From the desk, turn left to the nightstand between the two beds.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.417.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.917.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.418.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.912.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.415.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.908.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.415.png"
    }
  },
  {
    "instruction": "From the desk, turn left to the nightstand between the two beds.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.917.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.418.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.909.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.908.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.415.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.415.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.35.912.png"
    }
  },
  {
    "instruction": "From the desk, turn left to the nightstand between the two beds.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.418.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.37.909.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.415.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.39.412.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.417.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.38.908.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.36.917.png"
    }
  },
  {
    "instruction": "From between the two beds, pass the Violent Bear, turn left to the corner table and chairs.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.44.931.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.45.424.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.45.929.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.46.422.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.47.923.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.48.428.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.46.921.png"
    }
  },
  {
    "instruction": "From between the two beds, pass the Violent Bear, turn left to the corner table and chairs.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.45.424.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.45.929.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.46.422.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.47.923.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.46.921.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.44.931.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.47.423.png"
    }
  },
  {
    "instruction": "From between the two beds, pass the Violent Bear, turn left to the corner table and chairs.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.45.929.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.46.422.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.46.921.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.47.923.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.45.424.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.48.428.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.44.931.png"
    }
  },
  {
    "instruction": "From between the two beds, pass the Violent Bear, turn left to the corner table and chairs.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.46.422.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.46.921.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.47.423.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.47.923.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.48.428.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.45.929.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.45.424.png"
    }
  },
  {
    "instruction": "Go around the bed to the left cabinet by the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.55.478.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.55.928.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.56.425.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.58.469.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.57.971.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.57.470.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.58.930.png"
    }
  },
  {
    "instruction": "Go around the bed to the left cabinet by the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.55.928.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.56.425.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.56.979.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.57.470.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.55.478.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.58.469.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.57.971.png"
    }
  },
  {
    "instruction": "Go around the bed to the left cabinet by the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.56.425.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.56.979.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.57.470.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.55.928.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.55.478.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.58.469.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.58.930.png"
    }
  },
  {
    "instruction": "Go around the bed to the left cabinet by the window.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.56.979.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.57.470.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.57.971.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.56.425.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.58.930.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.58.469.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/6/2025-09-12_16.59.55.928.png"
    }
  },
  {
    "instruction": "Go from the chair in front to the table and chairs at the far end of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.26.544.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.27.052.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.27.546.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.28.044.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.28.546.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.32.050.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.29.546.png"
    }
  },
  {
    "instruction": "Go from the chair in front to the table and chairs at the far end of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.27.052.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.27.546.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.28.044.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.26.544.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.32.050.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.31.551.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.29.046.png"
    }
  },
  {
    "instruction": "Go from the chair in front to the table and chairs at the far end of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.27.546.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.28.044.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.28.546.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.26.544.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.30.049.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.29.546.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.31.551.png"
    }
  },
  {
    "instruction": "Go from the chair in front to the table and chairs at the far end of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.28.044.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.28.546.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.29.046.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.31.551.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.30.049.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.30.553.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.27.052.png"
    }
  },
  {
    "instruction": "Go from the chair in front to the table and chairs at the far end of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.28.546.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.29.046.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.29.546.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.31.048.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.26.544.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.30.553.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.31.551.png"
    }
  },
  {
    "instruction": "Go from the chair in front to the table and chairs at the far end of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.29.046.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.29.546.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.30.049.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.30.553.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.31.048.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.31.551.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.28.546.png"
    }
  },
  {
    "instruction": "Go from the chair in front to the table and chairs at the far end of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.29.546.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.30.049.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.30.553.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.31.551.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.26.544.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.27.546.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.29.046.png"
    }
  },
  {
    "instruction": "Go from the chair in front to the table and chairs at the far end of the room.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.30.049.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.30.553.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.31.048.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.26.544.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.29.046.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.29.546.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/7/2025-09-12_17.10.32.050.png"
    }
  },
  {
    "instruction": "From your current position, go around the bookshelf and walk to the table behind it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.25.582.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.26.079.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.26.587.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.30.079.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.27.591.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.29.081.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.28.589.png"
    }
  },
  {
    "instruction": "From your current position, go around the bookshelf and walk to the table behind it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.26.079.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.26.587.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.27.081.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.29.081.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.29.583.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.28.088.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.27.591.png"
    }
  },
  {
    "instruction": "From your current position, go around the bookshelf and walk to the table behind it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.26.587.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.27.081.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.27.591.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.29.081.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.28.589.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.25.582.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.26.079.png"
    }
  },
  {
    "instruction": "From your current position, go around the bookshelf and walk to the table behind it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.27.081.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.27.591.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.28.088.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.30.079.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.28.589.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.26.079.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.29.081.png"
    }
  },
  {
    "instruction": "From your current position, go around the bookshelf and walk to the table behind it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.27.591.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.28.088.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.28.589.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.26.587.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.29.583.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.25.582.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.27.081.png"
    }
  },
  {
    "instruction": "From your current position, go around the bookshelf and walk to the table behind it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.28.088.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.28.589.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.29.081.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.30.079.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.26.587.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.29.583.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.25.582.png"
    }
  },
  {
    "instruction": "Turn right from the table, go around the bookshelf to the guitar behind it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.30.580.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.31.082.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.31.591.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.32.583.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.34.582.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.34.090.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.35.080.png"
    }
  },
  {
    "instruction": "Turn right from the table, go around the bookshelf to the guitar behind it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.31.082.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.31.591.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.32.089.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.32.583.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.33.083.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.33.587.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.34.090.png"
    }
  },
  {
    "instruction": "Turn right from the table, go around the bookshelf to the guitar behind it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.31.591.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.32.089.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.32.583.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.33.587.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.33.083.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.34.090.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.35.080.png"
    }
  },
  {
    "instruction": "Turn right from the table, go around the bookshelf to the guitar behind it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.32.089.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.32.583.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.33.083.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.34.582.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.34.090.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.30.580.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.35.080.png"
    }
  },
  {
    "instruction": "Turn right from the table, go around the bookshelf to the guitar behind it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.32.583.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.33.083.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.33.587.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.31.591.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.30.580.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.34.582.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.32.089.png"
    }
  },
  {
    "instruction": "Turn right from the table, go around the bookshelf to the guitar behind it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.33.083.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.33.587.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.34.090.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.35.080.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.32.089.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.30.580.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/8/2025-09-12_21.39.32.583.png"
    }
  },
  {
    "instruction": "Walk around the table on your left, pass by the bed, go around the first bookshelf, and keep going until you see the computer desk with a computer. Stop at the bookshelf next to the desk.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.55.570.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.56.075.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.56.565.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.59.068.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.57.567.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.59.572.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.57.074.png"
    }
  },
  {
    "instruction": "Walk around the table on your left, pass by the bed, go around the first bookshelf, and keep going until you see the computer desk with a computer. Stop at the bookshelf next to the desk.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.56.075.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.56.565.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.57.074.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.00.070.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.59.068.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.57.567.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.58.067.png"
    }
  },
  {
    "instruction": "Walk around the table on your left, pass by the bed, go around the first bookshelf, and keep going until you see the computer desk with a computer. Stop at the bookshelf next to the desk.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.56.565.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.57.074.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.57.567.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.56.075.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.00.070.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.59.068.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.58.565.png"
    }
  },
  {
    "instruction": "Walk around the table on your left, pass by the bed, go around the first bookshelf, and keep going until you see the computer desk with a computer. Stop at the bookshelf next to the desk.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.57.074.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.57.567.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.58.067.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.59.068.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.59.572.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.56.075.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.00.070.png"
    }
  },
  {
    "instruction": "Walk around the table on your left, pass by the bed, go around the first bookshelf, and keep going until you see the computer desk with a computer. Stop at the bookshelf next to the desk.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.57.567.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.58.067.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.58.565.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.57.074.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.59.572.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.59.068.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.55.570.png"
    }
  },
  {
    "instruction": "Walk around the table on your left, pass by the bed, go around the first bookshelf, and keep going until you see the computer desk with a computer. Stop at the bookshelf next to the desk.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.58.067.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.58.565.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.59.068.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.56.565.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.57.074.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.53.57.567.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.00.070.png"
    }
  },
  {
    "instruction": "Walk around the bookcase in front of you, pass by the bed and the mirror, and stop at the desk by the corner of the wall.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.00.568.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.01.066.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.01.570.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.04.075.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.04.572.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.03.069.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.02.571.png"
    }
  },
  {
    "instruction": "Walk around the bookcase in front of you, pass by the bed and the mirror, and stop at the desk by the corner of the wall.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.01.066.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.01.570.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.02.076.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.05.071.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.03.069.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.03.623.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.05.575.png"
    }
  },
  {
    "instruction": "Walk around the bookcase in front of you, pass by the bed and the mirror, and stop at the desk by the corner of the wall.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.01.570.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.02.076.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.02.571.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.03.623.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.04.075.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.00.568.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.05.575.png"
    }
  },
  {
    "instruction": "Walk around the bookcase in front of you, pass by the bed and the mirror, and stop at the desk by the corner of the wall.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.02.076.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.02.571.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.03.069.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.06.071.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.05.575.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.04.075.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.05.071.png"
    }
  },
  {
    "instruction": "Walk around the bookcase in front of you, pass by the bed and the mirror, and stop at the desk by the corner of the wall.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.02.571.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.03.069.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.03.623.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.02.076.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.04.572.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.06.071.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.05.071.png"
    }
  },
  {
    "instruction": "Walk around the bookcase in front of you, pass by the bed and the mirror, and stop at the desk by the corner of the wall.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.03.069.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.03.623.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.04.075.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.01.570.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.05.575.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.04.572.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.05.071.png"
    }
  },
  {
    "instruction": "Walk around the bookcase in front of you, pass by the bed and the mirror, and stop at the desk by the corner of the wall.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.03.623.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.04.075.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.04.572.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.05.575.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.05.071.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.06.071.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.03.069.png"
    }
  },
  {
    "instruction": "Walk around the bookcase in front of you, pass by the bed and the mirror, and stop at the desk by the corner of the wall.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.04.075.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.04.572.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.05.071.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.02.076.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.01.570.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.05.575.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.06.071.png"
    }
  },
  {
    "instruction": "Walk past the chair on your right, go around the bed, and stop at the computer.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.06.571.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.07.072.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.07.620.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.10.119.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.08.119.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.08.619.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.10.615.png"
    }
  },
  {
    "instruction": "Walk past the chair on your right, go around the bed, and stop at the computer.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.07.072.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.07.620.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.08.119.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.10.119.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.08.619.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.10.615.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.09.076.png"
    }
  },
  {
    "instruction": "Walk past the chair on your right, go around the bed, and stop at the computer.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.07.620.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.08.119.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.08.619.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.10.119.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.09.574.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.06.571.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.09.076.png"
    }
  },
  {
    "instruction": "Walk past the chair on your right, go around the bed, and stop at the computer.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.08.119.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.08.619.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.09.076.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.07.620.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.07.072.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.09.574.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.10.119.png"
    }
  },
  {
    "instruction": "Walk past the chair on your right, go around the bed, and stop at the computer.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.08.619.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.09.076.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.09.574.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.07.620.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.10.119.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.10.615.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.07.072.png"
    }
  },
  {
    "instruction": "Walk around the bed and stop at the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.15.114.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.15.621.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.16.119.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.16.625.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.17.122.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.17.614.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.18.124.png"
    }
  },
  {
    "instruction": "Walk around the bed and stop at the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.15.621.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.16.119.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.16.625.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.19.118.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.15.114.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.18.623.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.17.614.png"
    }
  },
  {
    "instruction": "Walk around the bed and stop at the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.16.119.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.16.625.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.17.122.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.15.114.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.19.118.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.18.124.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.17.614.png"
    }
  },
  {
    "instruction": "Walk around the bed and stop at the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.16.625.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.17.122.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.17.614.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.18.124.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.18.623.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.19.118.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.15.621.png"
    }
  },
  {
    "instruction": "Walk around the bed and stop at the bookshelf.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.17.122.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.17.614.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.18.124.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.19.118.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.15.114.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.16.119.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.18.623.png"
    }
  },
  {
    "instruction": "From the desk and chair on your right, walk past the bed toward the mirror and stop next to it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "B",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.24.119.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.24.615.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.25.115.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.25.621.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.26.124.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.27.624.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.27.124.png"
    }
  },
  {
    "instruction": "From the desk and chair on your right, walk past the bed toward the mirror and stop next to it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.24.615.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.25.115.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.25.621.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.26.614.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.24.119.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.27.124.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.26.124.png"
    }
  },
  {
    "instruction": "From the desk and chair on your right, walk past the bed toward the mirror and stop next to it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.25.115.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.25.621.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.26.124.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.27.624.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.24.615.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.24.119.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.27.124.png"
    }
  },
  {
    "instruction": "From the desk and chair on your right, walk past the bed toward the mirror and stop next to it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.25.621.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.26.124.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.26.614.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.25.115.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.24.119.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.27.624.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.24.615.png"
    }
  },
  {
    "instruction": "Slightly turn left from where you are, head toward the mirror on the wall, and stop next to it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.30.625.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.31.120.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.31.614.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.34.119.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.34.615.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.35.125.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.32.619.png"
    }
  },
  {
    "instruction": "Slightly turn left from where you are, head toward the mirror on the wall, and stop next to it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.31.120.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.31.614.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.32.118.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.33.117.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.34.615.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.30.625.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.32.619.png"
    }
  },
  {
    "instruction": "Slightly turn left from where you are, head toward the mirror on the wall, and stop next to it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.31.614.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.32.118.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.32.619.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.34.615.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.31.120.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.35.125.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.33.617.png"
    }
  },
  {
    "instruction": "Slightly turn left from where you are, head toward the mirror on the wall, and stop next to it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "D",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.32.118.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.32.619.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.33.117.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.34.615.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.35.125.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.31.614.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.34.119.png"
    }
  },
  {
    "instruction": "Slightly turn left from where you are, head toward the mirror on the wall, and stop next to it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "A",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.32.619.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.33.117.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.33.617.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.34.615.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.31.120.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.30.625.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.34.119.png"
    }
  },
  {
    "instruction": "Slightly turn left from where you are, head toward the mirror on the wall, and stop next to it.",
    "input": "The historical images are as follows:\n<img_hist0>\n<img_hist1>\n<img_hist2>\nAnd the choices are as follows: <img_cand0>, <img_cand1>, <img_cand2>, <img_cand3>",
    "output": "C",
    "images": {
      "img_hist0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.33.117.png",
      "img_hist1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.33.617.png",
      "img_hist2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.34.119.png",
      "img_cand0": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.32.619.png",
      "img_cand1": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.31.120.png",
      "img_cand2": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.35.125.png",
      "img_cand3": "/mnt/shared-storage-user/tanxin/zhanglechao/mc_dataset/extend_vln_data/9/2025-09-12_21.54.31.614.png"
    }
  }
]