[
  {
    "video_id": "00348",
    "task": "Video-driven Audio Hallucination",
    "text": "Is the man making sound in the audio?",
    "label": "Yes"
  },
  {
    "video_id": "00348",
    "task": "Video-driven Audio Hallucination",
    "text": "Is the crowd making sound in the audio?",
    "label": "Yes"
  },
  {
    "video_id": "00348",
    "task": "Video-driven Audio Hallucination",
    "text": "Is the screen making sound in the audio?",
    "label": "No"
  },
  {
    "video_id": "00348",
    "task": "Video-driven Audio Hallucination",
    "text": "Is the stage making sound in the audio?",
    "label": "No"
  },
  {
    "video_id": "00348",
    "task": "Audio-driven Video Hallucination",
    "text": "Is the man visible in the video?",
    "label": "Yes"
  },
  {
    "video_id": "00348",
    "task": "Audio-driven Video Hallucination",
    "text": "Is the crowd visible in the video?",
    "label": "Yes"
  },
  {
    "video_id": "00348",
    "task": "Audio-driven Video Hallucination",
    "text": "Is the bell visible in the video?",
    "label": "No"
  },
  {
    "video_id": "00348",
    "task": "Audio-driven Video Hallucination",
    "text": "Is the cat visible in the video?",
    "label": "No"
  },
  {
    "video_id": "00348",
    "task": "AV Matching",
    "text": "Are the contexts of audio and visual content matching?",
    "label": "Yes"
  },
  {
    "video_id": "00348",
    "task": "AV Captioning",
    "text": "Describe what you see and hear in a single sentence.",
    "label": "On stage at an event, a man and a woman engage in conversation, a bell rings, and a cat meows, and a crowd applause."
  },
  {
    "video_id": "00348_swap",
    "task": "AV Matching",
    "text": "Are the contexts of audio and visual content matching?",
    "label": "No"
  }
]