[
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-0BIyqJj9ZU_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-0BIyqJj9ZU_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is people belly laughing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-0UuUoXQUoI_000107.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0welW-8hB1c_000071.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing double bass."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-2-wdcN5vOw_000017.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-2-wdcN5vOw_000017.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is train whistling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-23CeprtibU_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-cA9HsnV1ao_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as chainsawing trees."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-2Dm0VjW8oM_000001.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DZFP5hm7iKg_000161.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as pigeon, dove cooing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-2sOH8XovEE_000484.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/A_0xhMEZ1Cg_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing table tennis."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-3Kv4fdm7Uk_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2YJpjSldLtg_000495.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing steelpan."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-4bPiXbovf0_000008.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/7T04388Ijk8_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by air conditioning noise."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-5CGQGSFGyg_000060.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-5CGQGSFGyg_000060.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing electronic organ."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-5z9IXBJefc_000227.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0sT6vnChbrc_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by playing djembe."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-79qo5MUYBk_000207.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-79qo5MUYBk_000207.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is train whistling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-7TanrCbmME_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-7TanrCbmME_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as child singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-7sg--aJdrc_000049.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-r2-9oyIzkQ_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as gibbon howling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-7tYmeOmsRg_000058.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-7tYmeOmsRg_000058.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as people eating crisps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-7tYmeOmsRg_000180.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0welW-8hB1c_000071.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is people eating crisps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-86gl1hp1Aw_000105.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DYdalOQnx1Y_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by parrot talking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-8cgbhIR_pw_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3gR0QBgrzYQ_000020.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing acoustic guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-A3zsFeU_OI_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3GtKbvwaycY_000025.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as people sniggering."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-ANxUxvGASw_000010.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-ANxUxvGASw_000010.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from subway, metro, underground."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-ByoSbgzr4M_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-ByoSbgzr4M_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as tapping guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-CZ1LIc8aos_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-CZ1LIc8aos_000020.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is car passing by."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Cr0WQoFQQs_000045.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Cr0WQoFQQs_000045.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is elk bugling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Cv3gOXEoxA_000040.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0mNvPaqfwUI_000220.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as fox barking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-D64b_8YJK4_000046.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-D64b_8YJK4_000046.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is barn swallow calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-E5o64ACjm0_000017.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-E5o64ACjm0_000017.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as lathe spinning."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-FoTxwPOz3U_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-FoTxwPOz3U_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by child speech, kid speaking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-GOaBCyC5Js_000087.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/7fLnFGd79-s_000018.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is playing cornet."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-GW1J75oAKU_000304.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-GW1J75oAKU_000304.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing violin, fiddle."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-HMEhGV38GM_000216.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-wnDAPcoPsk_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing congas."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-IKnJa9U66I_000150.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-JUBdOr8Hes_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is using sewing machines."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Igq1W20Gi8_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Z1ZSWDouUU_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from people whistling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-JGpAlLrSD0_000458.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-JGpAlLrSD0_000458.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as train whistling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-JUBdOr8Hes_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/7QnyKZe6VBA_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by playing accordion."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-JUhUI_KvUI_000026.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4qaiLMEce6Y_000025.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as gibbon howling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-K-ccLMFE5M_000259.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-K-ccLMFE5M_000259.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by people marching."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-K1BRF6qng8_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-K1BRF6qng8_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from female singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-KBH8YmHR-0_000050.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GXRooshOGuc_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from snake rattling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Lmibx_Iu_E_000173.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Lmibx_Iu_E_000173.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from tractor digging."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Lv13WPa4xk_000170.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Lv13WPa4xk_000170.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by airplane flyby."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-N00SskHxS4_000529.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-N00SskHxS4_000529.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing bugle."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-NCa3eFLULw_000221.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GSSWxAo_oyo_000011.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by dinosaurs bellowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-NwBdmVSg1w_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AgtY6m-b3Gk_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is cat hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-OAyRsvFGgc_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/7QnyKZe6VBA_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as tapping guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-OVb-UG8yJw_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/00QQLLcny14_000083.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is engine accelerating, revving, vroom."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-PvlrLjIAYo_000009.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4ep09nZl3LA_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from parrot talking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Q3MX7Tgx_E_000111.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4YHYnWM_jMQ_000083.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by cat hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-RBs9pPhHY8_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4_cfXx3iLes_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by fire truck siren."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-S-TDT5oq0Q_000290.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AZfD5KrH5d8_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as cattle mooing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-SWaCArvQug_000021.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AhlnqK6ONro_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is opening or closing drawers."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Vo4CAMX26U_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/74p3DLeDCHE_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing didgeridoo."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-W3WpZvJX2o_000027.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/253aGgk7NSE_000010.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by bathroom ventilation fan running."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Wd5YV97ftU_000320.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3uuyQ4O0L68_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by skidding."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-XwtCzUaN1I_000160.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-XwtCzUaN1I_000160.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing cymbal."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Y-UegG9tLw_000282.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1xyVkQJ8A54_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing cornet."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-YwPSwhe2jo_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-YwPSwhe2jo_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is people shuffling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-Z1ZSWDouUU_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/38F6eeIR-s0_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from motorboat, speedboat acceleration."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-ZJqu_4zLMc_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3QEd9nR_p7w_000018.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from cat meowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-ZSgg6jFUd8_000688.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-ZSgg6jFUd8_000688.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is people eating crisps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-_QPd-VskKY_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-_QPd-VskKY_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from mouse squeaking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-_umX4zgLVY_000028.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-r2-9oyIzkQ_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is lions growling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-aK9JKAGme0_000041.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-aK9JKAGme0_000041.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is cheetah chirrup."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-aK9JKAGme0_000051.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-aK9JKAGme0_000051.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as cheetah chirrup."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-aUJLRXkj-Y_000118.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-aUJLRXkj-Y_000118.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from children shouting."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-cA9HsnV1ao_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1nUqhH8bAPk_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from hair dryer drying."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-d1KR2BSfHM_000065.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-d1KR2BSfHM_000065.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as squishing water."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-e4wXAy1iVo_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/ATnl-fq5nAk_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing ukulele."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-fAVezaAX18_000126.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0NLXz4JgvcQ_000096.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is playing drum kit."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-fJsZm3YRc0_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2KEg1a42Wx0_000288.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is reversing beeps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-fPdOa99Iwg_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AGDQXlgdzhA_000285.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is people sniggering."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-fTfRh0_RQ4_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0Iy18mslB4A_000170.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is car passing by."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-g-GjgEq8l4_000017.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DcMUa81JfBE_000032.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is yodelling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-gCqnkIUmp0_000140.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-gCqnkIUmp0_000140.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as people babbling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-gSfPQqi6nI_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-gSfPQqi6nI_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from cat meowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-hYRFCQdbLg_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AgtY6m-b3Gk_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by playing steelpan."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-hedbpc8T0E_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AIDJFkDURPY_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from people babbling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-hss7xpzIVc_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-hss7xpzIVc_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from male singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-iF4a6f5PJ8_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-oOARU0JwWE_000153.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by pheasant crowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-iF4a6f5PJ8_000060.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AcLX-YyZE08_000167.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as pheasant crowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-jZENGDFArw_000080.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-jZENGDFArw_000080.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by child speech, kid speaking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-kPDH9n0PG4_000310.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-kPDH9n0PG4_000310.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as railroad car, train wagon."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-kZVoaYYU6o_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-kZVoaYYU6o_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is driving buses."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-lPXTBXa0tE_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-lPXTBXa0tE_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is tapping guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-mKtgDnG0oM_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-mKtgDnG0oM_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is female singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-nQ9a0P1TlY_000023.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-z4OZ7ls5Bo_000540.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is child singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-oGJwGEsIiA_000090.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3-PFuDkTM48_000080.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as splashing water."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-oOARU0JwWE_000153.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2YW7WBtqzzQ_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from people humming."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-oSzD8P2BtU_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1ACn3u5UnBw_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by reversing beeps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-ocADGlyaHc_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/16CvcIXIjzQ_000332.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is helicopter."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-r2-9oyIzkQ_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-r2-9oyIzkQ_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by male singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-r3nM90RCNs_000161.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-r3nM90RCNs_000161.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from sharpen knife."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-rTTPoBJNI4_000123.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-rTTPoBJNI4_000123.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing tambourine."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-rb6uia1wSo_000190.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-rb6uia1wSo_000190.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing clarinet."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-rqhMzJRYoc_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/7gTiMEazgmE_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as people hiccup."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-t-htrAtNvM_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-t-htrAtNvM_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as reversing beeps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-u-40BIU1HE_000003.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2j4dsRMuj4Y_000079.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by cat hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-uD3ZbhFTnk_000077.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/31GOxPXDNkk_000068.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from baltimore oriole calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-vAtBe4LMCQ_000063.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0welW-8hB1c_000071.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as people eating crisps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-wi8kPVJLcw_000205.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-wi8kPVJLcw_000205.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from air conditioning noise."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-wnDAPcoPsk_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-wnDAPcoPsk_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing hammond organ."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-xQbyezhw_k_000040.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/448X3t13rlk_000325.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is hair dryer drying."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-yicwYUKKuo_000304.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-yicwYUKKuo_000304.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by airplane flyby."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-z3AAq0FK-0_000001.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-z3AAq0FK-0_000001.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by snake rattling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-z4OZ7ls5Bo_000540.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-z4OZ7ls5Bo_000540.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing ukulele."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-zCtwbk005g_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-zCtwbk005g_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is cat meowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-zSyfcXmHdk_001083.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-zSyfcXmHdk_001083.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is ice cream truck, ice cream van."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-zgGL2o1jqw_000080.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-zgGL2o1jqw_000080.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is toilet flushing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-ziWHyk_fYQ_000106.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-ziWHyk_fYQ_000106.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by civil defense siren."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0-jDld11jhw_000093.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/A7DQjMwCtI8_000270.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing cornet."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/007P6bFgRCU_000010.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/007P6bFgRCU_000010.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from playing trumpet."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/00QQLLcny14_000083.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/00QQLLcny14_000083.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by electric shaver, electric razor shaving."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/00wORCOKNHw_000014.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/00wORCOKNHw_000014.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from horse clip-clop."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/00z0M1DJyAw_000090.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/D3BJuOwltoI_000010.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is turkey gobbling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/01SaXyGY5SM_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/01SaXyGY5SM_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by cat caterwauling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/02UvvE1oA1I_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/7Gua0-UrKIw_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by goat bleating."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/030h_KwNm-g_000003.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/030h_KwNm-g_000003.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from sheep bleating."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/04QV0Xi5YTk_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Vu6HtlYio4_000219.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by playing zither."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/052VMqTM4Xs_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3mDPQ_CPopw_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is donkey, ass braying."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/064Ilsz8Fzg_000051.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/064Ilsz8Fzg_000051.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by car engine starting."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/06si40RVDco_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4Q1DDOpej1o_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is driving motorcycle."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/076qdKSC8nU_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/076qdKSC8nU_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by tapping guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/077aWlQn6XI_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/A_0xhMEZ1Cg_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from reversing beeps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/09NfjFeq2cs_000078.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/09NfjFeq2cs_000078.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as francolin calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/09pZj6CEvRw_000250.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1yKGjTOk7NY_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is church bell ringing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0CEsVHHkwww_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0CEsVHHkwww_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as driving snowmobile."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0CzC0-cXe8M_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0CzC0-cXe8M_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as lawn mowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0Dt2-TtqvJU_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0Dt2-TtqvJU_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from child singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0E6KS8B6DMc_000540.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0E6KS8B6DMc_000540.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from playing bass drum."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0EOQco76eXQ_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AgLNBaSMRBA_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing bass guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0F04c_rY4aw_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/Gg6Jx9UTBZI_000158.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as dog howling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0FEd3Pd-ksw_000397.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0FEd3Pd-ksw_000397.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from car engine starting."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0FHSJnza9P8_000046.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/7OqOXtp8_tk_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from barn swallow calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0G50t4FlbIA_000060.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AIDJFkDURPY_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as people screaming."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0H814XXq-WQ_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0H814XXq-WQ_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is people crowd."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0HBJuYt2Eys_000032.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0HBJuYt2Eys_000032.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as disc scratching."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0HXYdGGKV2k_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0HXYdGGKV2k_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing banjo."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0IqPUUWnnd8_000085.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0IqPUUWnnd8_000085.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing glockenspiel."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0Iy18mslB4A_000170.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GXRooshOGuc_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing flute."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0IzhjqyDzt8_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DHoVXJe-MXY_000293.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as church bell ringing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0JkiTl-4B5E_000017.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0JkiTl-4B5E_000017.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing oboe."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0K92tp-4GEk_000108.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-aK9JKAGme0_000051.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from bathroom ventilation fan running."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0LwSy-xZSPw_000112.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4WsPXawPBF8_000005.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as male singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0MU2SG5ME1E_000016.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2IEoe3YvCJo_000020.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by fox barking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0N0C0Wbe6AI_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0N0C0Wbe6AI_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by police car (siren)."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0NIE-eDk92M_000029.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0NIE-eDk92M_000029.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing congas."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0NLXz4JgvcQ_000096.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2REf8hLLYMw_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is people eating crisps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0NQXsEwBqAk_000136.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0NQXsEwBqAk_000136.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by tractor digging."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0O-gZoirpRA_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DUlxiX4ri1Q_000045.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from reversing beeps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0OH28bnOdEE_000058.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4IJCSuJoo9o_000002.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as people gargling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0OLYyjqNdn4_000352.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0OLYyjqNdn4_000352.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as train whistling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0Olm321vgk8_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2EYGJpU7juM_000130.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by male singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0PHUQku2P5I_000050.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2EYGJpU7juM_000130.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as singing choir."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0QSPtpp1HN8_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0QSPtpp1HN8_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from playing hammond organ."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0QabiRBcEKc_000050.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0QabiRBcEKc_000050.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is male speech, man speaking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0RZRFj7zDnQ_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0RZRFj7zDnQ_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing cello."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0Rqa18lmDtw_000153.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2vycuIKwnnA_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as driving motorcycle."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0T966F09Q_8_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0T966F09Q_8_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from playing oboe."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0TSOlaZXXM8_000080.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0TSOlaZXXM8_000080.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from playing bass guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0U9_6RyH3_M_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AgVUGzrzJ20_000320.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from mouse squeaking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0UF1FHDPM1A_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0UF1FHDPM1A_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as people sniggering."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0UeEkqkZmvo_000160.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0UeEkqkZmvo_000160.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing washboard."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0W1En_XsDnM_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AK_xkz5nfIM_000089.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is people burping."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0W_wPc-zV3I_000101.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0welW-8hB1c_000071.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from hedge trimmer running."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0Wkk9iNzA9c_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4WH0ZxvF9Nw_000003.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as dog growling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0Wxl_LzqguI_000288.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0Wxl_LzqguI_000288.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is people battle cry."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0XDEooC1h7s_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4cgDBRTdmq0_000221.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as cow lowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0YTieIiZNN4_000010.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-r3nM90RCNs_000161.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as machine gun shooting."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0ZxpQ87jdgQ_000037.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3KbSLoAAzy0_000069.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is cow lowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0_9-dbUW0hU_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0_9-dbUW0hU_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing theremin."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0_B2YnBAz3A_000220.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3sYnYraOuIo_000510.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by people sneezing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0apGJGFHQO4_000067.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0apGJGFHQO4_000067.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing violin, fiddle."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0bBJkY4_zs8_000033.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/7SKRc8oyLvE_000009.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is elk bugling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0bobzCB2ObQ_000076.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2j4dsRMuj4Y_000079.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from tractor digging."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0eA2QcSRgh0_000063.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0eA2QcSRgh0_000063.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from dog baying."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0eDN0YmnJRY_000002.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3otUlQ4wvLY_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by cheetah chirrup."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0g_-KI5NZ-E_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/A_oaLt-n4fQ_000220.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as people whistling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0glBUluF4Yk_000175.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0glBUluF4Yk_000175.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by fox barking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0hfU27A6tus_000070.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0hfU27A6tus_000070.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing banjo."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0hjqlmdPT6g_000050.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/G7ZE5J9gHMQ_000150.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is car engine starting."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0iX0QmnbrK4_000085.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0iX0QmnbrK4_000085.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing trombone."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0jy-sVREk4o_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0jy-sVREk4o_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by horse clip-clop."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0kQkVbOikzQ_000004.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0kQkVbOikzQ_000004.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by airplane flyby."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0m9-5BkL4Mc_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3njuN-F2Ecs_000332.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is child singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0mNvPaqfwUI_000220.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4ep09nZl3LA_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from railroad car, train wagon."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0n-Z2AQCRnU_000385.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/40sAH2ZB0Pg_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by people eating noodle."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0n56ajMNoMA_000118.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/Ggh5JPnDPNw_000070.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by driving snowmobile."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0nYltlo90Zc_000147.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2xQuWif8axE_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing congas."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0nYltlo90Zc_000161.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2xQuWif8axE_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing congas."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0nvBTp-q7tU_000112.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GVk8l4S9NN0_000039.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as driving snowmobile."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0oTdsarJHA4_000220.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0oTdsarJHA4_000220.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing trombone."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0olhNr566Z0_000018.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0olhNr566Z0_000018.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing trumpet."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0q5kQGcOn2I_000083.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/49PCE6AXrGw_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from francolin calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0q6GOUn_dhU_000066.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0q6GOUn_dhU_000066.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing castanets."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0q9FUj_2qMc_000539.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0q9FUj_2qMc_000539.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by hedge trimmer running."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0sT6vnChbrc_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2PHV2xNjGVU_000126.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by cat growling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0slFEpnTKkY_000115.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0slFEpnTKkY_000115.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing steel guitar, slide guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0teMcpgy9Jo_000058.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0teMcpgy9Jo_000058.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from pheasant crowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0ti14lvd2Aw_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-XwtCzUaN1I_000160.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as pheasant crowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0w9PLY1owzk_000108.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AHIV0RMwnN8_000210.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is airplane."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0wdNFZhpdQw_000368.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GIvdJOuBLgI_000340.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by people eating crisps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0welW-8hB1c_000071.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1yWGmdevTuM_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by black capped chickadee calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0wzsE67O5tE_000230.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/Gg6Jx9UTBZI_000158.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing ukulele."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0x9TnOQ5Nhw_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3p3A4QDXw-g_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as pigeon, dove cooing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0xPWkrfjMu0_000178.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0xPWkrfjMu0_000178.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is lip smacking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0yxJzCQW5zI_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0yxJzCQW5zI_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is people whispering."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/10YXuv9Go0E_000140.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AgLNBaSMRBA_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from skateboarding."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/10aBef0Ghkc_000040.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/10aBef0Ghkc_000040.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing piano."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/10aF24rMeu0_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2sKmbnOi5S0_000006.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from reversing beeps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/10hGfv89L3g_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/10hGfv89L3g_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as car passing by."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/115l3adqQ1M_000417.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/115l3adqQ1M_000417.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is driving motorcycle."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/126AbihZt28_000009.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/126AbihZt28_000009.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from horse clip-clop."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/12T-9dLEbY8_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/12T-9dLEbY8_000020.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is bird squawking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/130v5XJl8G0_000070.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2C4aXzGI2eI_000415.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from people coughing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/13W_9xdaOZE_000068.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/13W_9xdaOZE_000068.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is tap dancing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/14jcqHjMXcI_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/14jcqHjMXcI_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is horse clip-clop."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/14lBeX_dOHU_000234.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/74p3DLeDCHE_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by canary calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/14tRPoQA4q0_000004.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/14tRPoQA4q0_000004.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by baby laughter."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/15Mw2jyyHk0_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/15Mw2jyyHk0_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing bass guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/15pi8h_bHQE_000173.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/15pi8h_bHQE_000173.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by cow lowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/169a2JSphHA_000269.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-7tYmeOmsRg_000058.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing double bass."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/16CvcIXIjzQ_000332.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/16CvcIXIjzQ_000332.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is chinchilla barking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/17K-oEuH1p0_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2eDP3jKoUd4_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing bugle."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/17WW_p1N7BQ_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/17WW_p1N7BQ_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from cat meowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/17yDXvJTdeo_000603.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1cwGW0cBdRs_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as people slurping."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/18212B4yfLg_000080.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/18212B4yfLg_000080.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from lighting firecrackers."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/18r-Vtspi8g_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/18r-Vtspi8g_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from splashing water."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/190Zx0wVrhQ_000003.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1f-L1M-S6RQ_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as cat hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/19r6_MQei4M_000119.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2PHV2xNjGVU_000126.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is dinosaurs bellowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1ACn3u5UnBw_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2wGvpfAkly0_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from male singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1CZ-bybGzu4_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4_cfXx3iLes_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by people crowd."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1D6UsaTflJ8_000060.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DLMKy0_82iw_000150.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by bird wings flapping."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1ETrMAVZdY8_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1ETrMAVZdY8_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as bird chirping, tweeting."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1ErGsKIlhAI_000051.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-JUBdOr8Hes_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is people humming."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1F3_bvIT0ig_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1F3_bvIT0ig_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as car passing by."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1F9zCsJyw6k_000230.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1eBzpkIY-TY_000198.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by police car (siren)."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1FWUnWVX5Xk_000159.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/ADjyDcauxzM_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by baltimore oriole calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1FnA3w94zXI_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/13W_9xdaOZE_000068.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by tapping guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1GDAnA4hAqQ_000105.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2vYkvwD-fkc_000010.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as airplane."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1HUCwslugK8_000283.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1HUCwslugK8_000283.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing glockenspiel."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1Isq_KJHriU_000016.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1Isq_KJHriU_000016.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from dog baying."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1JY4rwQZn3s_000011.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1JY4rwQZn3s_000011.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is donkey, ass braying."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1JsIcP2nXMw_000108.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4cgDBRTdmq0_000221.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as fox barking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1MTKUN3uFrs_000440.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1MTKUN3uFrs_000440.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from fireworks banging."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1MaEJRh2oGY_000034.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1cxvg7qu0G0_000070.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is cheetah chirrup."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1MaEJRh2oGY_000089.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1MaEJRh2oGY_000089.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is cheetah chirrup."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1OFDyTzUj24_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3KbSLoAAzy0_000069.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by male speech, man speaking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1P9yEzO1Bic_000025.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1P9yEzO1Bic_000025.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing erhu."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1PN-bfs2EhY_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/7LWqCLxcNXc_000019.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is child singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1Qvce8w_Vds_000011.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/7B_3t_ELwxA_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from people nose blowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1RhYdQnZ_hw_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3uLmjElob2Q_000004.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from female singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1SJFTbu1abo_000042.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1SJFTbu1abo_000042.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is canary calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1TAOyk0Xgss_000370.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GafN7X_ifI0_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is vacuum cleaner cleaning floors."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1TJrceFMGrU_000114.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1TJrceFMGrU_000114.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as tractor digging."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1Tg9VvRZN5k_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1Tg9VvRZN5k_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is subway, metro, underground."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1Thl21awe2g_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1gWu-9hPzFs_000022.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as car engine knocking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1U8Y6VifI-M_000050.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/D30UC-C-lIs_000029.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is people sneezing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1UXkMZNI5o4_000373.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3KQxT20mY-k_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as wood thrush calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1V-_jMSJR6A_000024.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4cgDBRTdmq0_000221.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing trumpet."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1VHm2PacRWc_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1VHm2PacRWc_000020.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by cow lowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1WLMVX_Y2Pk_000018.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1WLMVX_Y2Pk_000018.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as elephant trumpeting."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1_0T2ZIjWnw_000031.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1_0T2ZIjWnw_000031.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by playing violin, fiddle."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1_JeO9Pg1aQ_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1_JeO9Pg1aQ_000020.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from cricket chirping."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1_Q80fDGLRM_000010.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/20qZLse0acs_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from dog barking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1_v-Kro86Io_000010.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4Vq6WDtxTt4_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing harpsichord."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1_xoCNmTn3Y_000012.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1_xoCNmTn3Y_000012.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as coyote howling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1_yyvTRVoZQ_000002.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1_yyvTRVoZQ_000002.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is alligators, crocodiles hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1_yyvTRVoZQ_000209.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1_yyvTRVoZQ_000209.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as alligators, crocodiles hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1aDnyPubxdY_000118.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1aDnyPubxdY_000118.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from baby crying."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1cqcTbDxsHM_000130.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3KQxT20mY-k_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing theremin."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1cwGW0cBdRs_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1cwGW0cBdRs_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing theremin."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1cxvg7qu0G0_000070.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1cxvg7qu0G0_000070.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from lighting firecrackers."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1dyFP4MGkIM_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1dyFP4MGkIM_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as skidding."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1eBzpkIY-TY_000198.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GKPDhTCffNw_000292.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as snake hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1f-L1M-S6RQ_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2es7oZzwLWM_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by engine accelerating, revving, vroom."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1g3n6Zaqid8_000064.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-K-ccLMFE5M_000259.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by black capped chickadee calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1gSwLBNvEWE_000034.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AIVt3e5EVtc_000070.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is playing oboe."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1gWu-9hPzFs_000022.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/20qZLse0acs_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as cheetah chirrup."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1hba9jmspEQ_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4RWpCBNrYSM_000174.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is female speech, woman speaking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1hknEtsmdOc_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AKmkhvb9shk_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is car passing by."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1hoUEfsTmdE_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1hoUEfsTmdE_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from female singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1hwkxJOl1PY_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1hwkxJOl1PY_000020.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is lions roaring."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1kTMiyc5eXg_000163.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1kTMiyc5eXg_000163.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as cricket chirping."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1kiR-BU9Fik_000071.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3F9Qszr4j1Q_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is lip smacking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1mkIuq3rjUg_000019.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Jpg_KvJWL0_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by playing guiro."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1n6JyJt4kJ4_000018.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ke9wf3Ajkg_000080.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from train horning."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1nUqhH8bAPk_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/20Nlj7Cz4E0_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from child singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1oJAVJPX0YY_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1oJAVJPX0YY_000020.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from railroad car, train wagon."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1obzCbmFpa8_000410.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1obzCbmFpa8_000410.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is people giggling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1p3NSKpuJDs_000144.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1p3NSKpuJDs_000144.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by child singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1p5jl5JeZx8_000117.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DIooS7FSHYk_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from people cheering."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1q8kkQhS97g_000140.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1q8kkQhS97g_000140.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing cymbal."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1rtXsGC-q3Q_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/G7Fr50rWppo_000040.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as engine accelerating, revving, vroom."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1sukRxLM77w_000021.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1sukRxLM77w_000021.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as hedge trimmer running."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1tC7bTilFWk_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/7PK3nvCihMk_000011.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by cat caterwauling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1uCzQCdCC1U_000170.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1uCzQCdCC1U_000170.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is race car, auto racing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1uhuVsFGe6E_000050.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DFTZgruYPL4_000115.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by child speech, kid speaking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1veIcvGPRYw_000003.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1veIcvGPRYw_000003.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as dog bow-wow."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1vrJPFfLMvA_000233.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4WH0ZxvF9Nw_000003.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is playing glockenspiel."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1vrR6Im_RDo_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1vrR6Im_RDo_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as female singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1vy-ZxTMQf4_000377.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1vy-ZxTMQf4_000377.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by people marching."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1woaZGPrY9g_000002.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3SyOlb_hSjg_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from car passing by."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1xS-iGBR2b8_000014.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-zSyfcXmHdk_001083.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from donkey, ass braying."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1xYpHbg0fEE_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0welW-8hB1c_000071.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from female singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1xyVkQJ8A54_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1xyVkQJ8A54_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by snake hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1yKGjTOk7NY_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/ANxHyEeVnV8_000026.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from child singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1yWGmdevTuM_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2QuZzkvq_YQ_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing didgeridoo."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1z19uJK6SB0_000076.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/18212B4yfLg_000080.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by playing djembe."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1zQtgRqPHZU_000024.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/18212B4yfLg_000080.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by bathroom ventilation fan running."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/20Nlj7Cz4E0_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/20Nlj7Cz4E0_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by male singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/20qZLse0acs_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/20qZLse0acs_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from dog bow-wow."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/21I0YiLHzvg_000038.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/21I0YiLHzvg_000038.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from opening or closing car electric windows."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/24tWz7gmngI_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GLIXnXZEOxY_000090.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from typing on computer keyboard."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/253aGgk7NSE_000010.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-zSyfcXmHdk_001083.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by playing bugle."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/25PvbXDbZO8_000035.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3GtvLj8K1F4_000140.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is otter growling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/25QsgUI0x8U_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4q4ejLxTo8I_000010.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by orchestra."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/26dqZUTv5os_000175.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/26dqZUTv5os_000175.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from forging swords."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/28StN9ykZ68_000094.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/28StN9ykZ68_000094.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is beat boxing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/28ehZa0fzmo_000028.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/28ehZa0fzmo_000028.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is fire truck siren."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/28p0DrP3KgI_000021.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4ZJrKmDGdA4_000140.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by electric grinder grinding."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/29MZ0dwOBOM_000112.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/28StN9ykZ68_000094.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from hedge trimmer running."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/29ZWMYIEq3g_000141.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/Ggh5JPnDPNw_000070.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as sharpen knife."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/29dwfsI1XlQ_000006.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3AzacBprTzU_000060.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as cat hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ACsPTob5cY_000118.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ACsPTob5cY_000118.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is snake rattling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ACsPTob5cY_000349.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ACsPTob5cY_000349.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as snake rattling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ASyuuVhr6M_000052.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/D1VQAb8CfWM_000008.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from cat hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2B-xGbzOjYY_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2B-xGbzOjYY_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing banjo."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2BR2Ivk_EFA_000021.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2BR2Ivk_EFA_000021.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by owl hooting."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Bljhdt61Y4_000038.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Bljhdt61Y4_000038.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing bassoon."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2C4aXzGI2eI_000415.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DDhKU_VH0Js_000228.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by fireworks banging."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2EYGJpU7juM_000130.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2EYGJpU7juM_000130.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing tympani."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2HZcxlRs-hg_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GbJ84ESfKOs_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as reversing beeps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2IEoe3YvCJo_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4Tpy1lsfcSM_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is cat caterwauling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2INpPpv4NRg_000560.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/126AbihZt28_000009.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from using sewing machines."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ISY5z0QQfc_000101.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ISY5z0QQfc_000101.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from playing erhu."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Jpg_KvJWL0_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Jpg_KvJWL0_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from reversing beeps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2JpvRGz5Dq0_000001.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2JpvRGz5Dq0_000001.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by snake rattling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2K5Px-N6BEI_000447.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AZfD5KrH5d8_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by cat hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2KEg1a42Wx0_000288.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-aK9JKAGme0_000051.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from cat hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2LRhkMH48_Q_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DQIwRVrlYqI_000159.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from motorboat, speedboat acceleration."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2M0i--pR-To_000037.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GVk8l4S9NN0_000039.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is playing bongo."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2MKPKDQcgVc_000036.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Wk4Y3_dMuc_000440.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as train wheels squealing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2NESqWU89V0_000338.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/Db6Hjt0x28k_000056.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by playing violin, fiddle."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Oe5kao1ycY_000310.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Oe5kao1ycY_000310.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is subway, metro, underground."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2PHV2xNjGVU_000126.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2PHV2xNjGVU_000126.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is tap dancing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Pl5B1Tpn1U_000024.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Pl5B1Tpn1U_000024.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as electric grinder grinding."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2QuZzkvq_YQ_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2QuZzkvq_YQ_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as child speech, kid speaking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2REf8hLLYMw_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3GtKbvwaycY_000025.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as cat meowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2RHarmH7DEk_000024.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2RHarmH7DEk_000024.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from woodpecker pecking tree."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2SchRwpsT7s_000048.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2SchRwpsT7s_000048.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from people battle cry."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Ssh4YWMroo_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Ssh4YWMroo_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by cat caterwauling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2T8OhRXZDTs_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DHoVXJe-MXY_000293.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing zither."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2TprtMP1RDo_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3otUlQ4wvLY_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is people whispering."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2U5f3uTl7qg_000167.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2U5f3uTl7qg_000167.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing tambourine."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2UvzCdos0nc_000036.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2UvzCdos0nc_000036.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as dog baying."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2VFVe0RCn7g_000010.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2VFVe0RCn7g_000010.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from tapping guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Vg5WTQZqwI_000022.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Vg5WTQZqwI_000022.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from playing double bass."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2VptepDtAZ4_000058.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0TSOlaZXXM8_000080.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is pheasant crowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2VptepDtAZ4_000071.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2VptepDtAZ4_000071.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by pheasant crowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Vu6HtlYio4_000219.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GafN7X_ifI0_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as snake rattling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2W4dw8XuetE_000050.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2W4dw8XuetE_000050.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as rowboat, canoe, kayak rowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Wk4Y3_dMuc_000440.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Wk4Y3_dMuc_000440.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by opening or closing drawers."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2YJpjSldLtg_000495.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2YJpjSldLtg_000495.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from driving snowmobile."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2YW7WBtqzzQ_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2YW7WBtqzzQ_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from people burping."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2YXcqoFlYFY_000160.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2YXcqoFlYFY_000160.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as fireworks banging."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ZNKLUHeJtg_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ZNKLUHeJtg_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing cello."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ZZEtUCIkUw_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/16CvcIXIjzQ_000332.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by dog bow-wow."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ZogsGp-T4o_000040.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ZogsGp-T4o_000040.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing mandolin."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2_RltCGlGOA_000010.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/Ah-IBgwj2lg_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing piano."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2bCuw7U_Rac_000390.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GafN7X_ifI0_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing accordion."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2bYyywE97aA_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2bYyywE97aA_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is vacuum cleaner cleaning floors."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2cEM6Hu90v0_000038.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GbI7rZa22Zs_000052.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing erhu."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2drXOn18U3Q_000136.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2drXOn18U3Q_000136.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from slot machine."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2eDP3jKoUd4_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2eDP3jKoUd4_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as owl hooting."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2eYaXTKSpMI_000004.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1f-L1M-S6RQ_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as cat meowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2es7oZzwLWM_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2es7oZzwLWM_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing zither."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2f7NnTVQzR0_000150.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2f7NnTVQzR0_000150.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as female singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ffgd7k0vI8_000040.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ffgd7k0vI8_000040.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by cap gun shooting."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2g5hx_rynAI_000035.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DZFP5hm7iKg_000161.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing shofar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2j4dsRMuj4Y_000079.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2j4dsRMuj4Y_000079.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is bathroom ventilation fan running."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2kHcvpsu_QY_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2kHcvpsu_QY_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing french horn."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2kQCMxf0onU_000036.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/ADjyDcauxzM_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as cat hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2kdJKmgMcxU_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2kdJKmgMcxU_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from cow lowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ke9wf3Ajkg_000080.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-kZVoaYYU6o_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as car engine knocking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2mQ1JOOs6LE_000054.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GIvdJOuBLgI_000340.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing congas."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2mw1UMcc_Ys_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2mw1UMcc_Ys_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by people babbling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2o4-lEtFb2k_000060.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2o4-lEtFb2k_000060.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by hair dryer drying."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2p37j052q3o_000004.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GBLKj2d0iC4_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from alarm clock ringing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2qAlWm3NES8_000034.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2qAlWm3NES8_000034.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is tap dancing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2qHRJAQDCDo_000095.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2qHRJAQDCDo_000095.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by lathe spinning."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2rW2I6KmWhw_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/Gg6Jx9UTBZI_000158.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is dog howling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2rciuzaEEWo_000025.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2IEoe3YvCJo_000020.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by chimpanzee pant-hooting."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ro_sQ7_vUM_000024.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2ro_sQ7_vUM_000024.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from cat hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2sKmbnOi5S0_000006.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3OlQmVVOzbQ_000250.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is playing table tennis."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2uHcWdkV4W4_000120.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0TSOlaZXXM8_000080.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from subway, metro, underground."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2uRe-e8RVEM_000190.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4ZJrKmDGdA4_000140.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from dog bow-wow."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2vYkvwD-fkc_000010.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/Ah-IBgwj2lg_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing harmonica."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2vcA_vgImHQ_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2vcA_vgImHQ_000020.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from lawn mowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2vycuIKwnnA_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2vycuIKwnnA_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from baltimore oriole calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2wGvpfAkly0_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2wGvpfAkly0_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as child singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2wsp3s4hIKs_000008.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0TSOlaZXXM8_000080.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from francolin calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2xBIHi3oqy4_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2xBIHi3oqy4_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as female speech, woman speaking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2xQuWif8axE_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-XwtCzUaN1I_000160.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing electric guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2zec0wuZgLE_000500.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2zec0wuZgLE_000500.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from opening or closing drawers."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3-PFuDkTM48_000080.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3-PFuDkTM48_000080.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing accordion."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3-qCp_EP-5Q_000234.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3-qCp_EP-5Q_000234.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from parrot talking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/30jlal45IKo_000097.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GXRooshOGuc_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from cat hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/31GOxPXDNkk_000068.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3uLmjElob2Q_000004.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing erhu."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/31O2j4aAgYU_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AMxnCzC33lo_000070.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is playing bass guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/31uVf6wvCTA_000208.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/15Mw2jyyHk0_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as people battle cry."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/36ToDxW_hns_000090.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/36ToDxW_hns_000090.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from playing mandolin."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/387WVkfK-5w_000057.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/387WVkfK-5w_000057.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by cow lowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/38F6eeIR-s0_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4cgDBRTdmq0_000221.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by reversing beeps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/39jhSrtRm70_000079.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-XwtCzUaN1I_000160.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is playing timbales."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/39rXWSXGOyo_000080.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/7Gua0-UrKIw_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is cat purring."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3AcKnYk_cLY_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3mDPQ_CPopw_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is motorboat, speedboat acceleration."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3AwevAQzwcs_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GW4XJ_zY-SQ_000010.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as cat meowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3AzacBprTzU_000060.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3AzacBprTzU_000060.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by dog barking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3ClbaJYWVO4_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3ClbaJYWVO4_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing electric guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3DK5YAQAVlI_000080.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2f7NnTVQzR0_000150.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from people nose blowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3DTZsjKMOyA_000289.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3DTZsjKMOyA_000289.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing steel guitar, slide guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3F9Qszr4j1Q_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/7VMWi8xm0NM_000020.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing flute."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3FE8b0QVvsM_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1MTKUN3uFrs_000440.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by playing harpsichord."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3FNTvu5iROw_000076.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3FNTvu5iROw_000076.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as wood thrush calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3GtKbvwaycY_000025.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/14tRPoQA4q0_000004.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by playing bassoon."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3GtKbvwaycY_000037.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/28ehZa0fzmo_000028.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is playing bassoon."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3GtvLj8K1F4_000140.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2j4dsRMuj4Y_000079.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing harmonica."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3H3261x-QgI_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3H3261x-QgI_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is goat bleating."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3HgbEKrbA9s_000054.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3HgbEKrbA9s_000054.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing double bass."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3IwOig7sw6c_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-D64b_8YJK4_000046.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as chainsawing trees."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3JYjNHFJfO4_000363.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/G5LqrMcq1QE_000280.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is baby laughter."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3KQxT20mY-k_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GUJCLsTnqTA_000192.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as goat bleating."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3KbSLoAAzy0_000069.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3KbSLoAAzy0_000069.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing guiro."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3LLKsRSkAbE_000110.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3LLKsRSkAbE_000110.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as subway, metro, underground."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3LNnX2Vq0XM_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3LNnX2Vq0XM_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as snake hissing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3LiCT1BSrgQ_000150.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3LiCT1BSrgQ_000150.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is reversing beeps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3LtXT2KVI-c_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2sKmbnOi5S0_000006.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by cat meowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3LxtO_vMokk_000010.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3LxtO_vMokk_000010.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by driving motorcycle."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3MUeg3nD2OU_000120.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3MUeg3nD2OU_000120.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing acoustic guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3NGZcpAZcl0_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4dVhbo5rq7w_000212.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from pigeon, dove cooing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3NvKtRbbOYU_000310.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2Vu6HtlYio4_000219.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as driving snowmobile."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3O7CzEL0pXA_000421.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3O7CzEL0pXA_000421.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from people marching."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3O7CzEL0pXA_000496.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3LiCT1BSrgQ_000150.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from people marching."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3OlQmVVOzbQ_000250.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3OlQmVVOzbQ_000250.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is rowboat, canoe, kayak rowing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3PD-JrOG1WA_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3PD-JrOG1WA_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as people hiccup."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3Pfu7BPWvL8_000036.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3Pfu7BPWvL8_000036.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as children shouting."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3QEd9nR_p7w_000018.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3QEd9nR_p7w_000018.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from playing washboard."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3RSmQOcI9JI_000105.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/ALdjQ9kbwd8_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by slot machine."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3RfBwL7tXuk_000188.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3RfBwL7tXuk_000188.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from popping popcorn."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3RfrTU1p5SA_000500.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GdFlyobMoAE_000009.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by people crowd."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3S-yoXKDvE4_000025.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3S-yoXKDvE4_000025.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is barn swallow calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3S3cX2QxOoA_000000.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/D5Zi0Mfo0Ts_000048.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing clarinet."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3SyOlb_hSjg_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3SyOlb_hSjg_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing electric guitar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3T3hm1hHCto_000228.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DFf71XYDX3g_000016.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by snake rattling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3T7FYQRx0YM_000033.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/D5Zi0Mfo0Ts_000048.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as donkey, ass braying."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3T8XhCkaA8M_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3xh2kScw64U_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as motorboat, speedboat acceleration."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3UCK4XCrvoc_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3UCK4XCrvoc_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is people babbling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3VfSV1vJydw_000191.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/AOwwp601QZw_000130.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is playing mandolin."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3Vpt-gR-8Lk_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3Vpt-gR-8Lk_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as cat purring."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3X9dtfeV1mQ_000040.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3X9dtfeV1mQ_000040.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by playing saxophone."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3XhpZyBrIVk_000018.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3PD-JrOG1WA_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as woodpecker pecking tree."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3YI7HlxdMxQ_000208.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DDhKU_VH0Js_000228.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is barn swallow calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3c6dQju-FYU_000021.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4WH0ZxvF9Nw_000003.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by civil defense siren."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3cThgRIaqgU_000016.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0NLXz4JgvcQ_000096.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is playing trumpet."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3eaA8bsLJBA_000042.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3eaA8bsLJBA_000042.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as playing cornet."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3g3cr1rw3HU_000105.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3g3cr1rw3HU_000105.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from train whistling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3gR0QBgrzYQ_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/0N0C0Wbe6AI_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is vacuum cleaner cleaning floors."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3gjFMkV2ucY_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3gjFMkV2ucY_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing accordion."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3h1-zP8M-yU_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GXRooshOGuc_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is playing shofar."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3k8zGf8Btfo_000250.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/48zE-hRAYEA_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing drum kit."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3mCWARDySc8_000059.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/20qZLse0acs_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by playing violin, fiddle."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3mDPQ_CPopw_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3mDPQ_CPopw_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is engine accelerating, revving, vroom."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3mENYq_Ta3s_000012.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3mENYq_Ta3s_000012.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by people booing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3mkiTkMVemM_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3mkiTkMVemM_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as motorboat, speedboat acceleration."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3mtdpXbogzk_000180.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4O-dYCrVKLY_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from motorboat, speedboat acceleration."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3njuN-F2Ecs_000332.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/1WLMVX_Y2Pk_000018.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from playing washboard."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3oGWyWYbFyk_000006.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3oGWyWYbFyk_000006.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is elk bugling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3oaky81Equ8_000016.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3oaky81Equ8_000016.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is electric grinder grinding."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3oaky81Equ8_000045.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-_QPd-VskKY_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is electric grinder grinding."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3otUlQ4wvLY_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/Ah-IBgwj2lg_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from male singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3p3A4QDXw-g_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/D3BJuOwltoI_000010.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by male singing."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3ptxRyVuU0w_000520.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3ptxRyVuU0w_000520.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is machine gun shooting."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3qEb6Y-D7Mw_000050.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3qEb6Y-D7Mw_000050.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as child speech, kid speaking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3qesirWAGt4_000020.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/DFf71XYDX3g_000016.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by dog barking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3sQrJVyxju0_000065.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3xh2kScw64U_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing mandolin."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3sYnYraOuIo_000510.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/26dqZUTv5os_000175.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is fireworks banging."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3sz7nQTcwcE_000009.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/A_oaLt-n4fQ_000220.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "From this picture, it can be found that the given sound is produced by dog growling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3t6pqBKV0kY_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/2VptepDtAZ4_000071.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is motorboat, speedboat acceleration."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3twm2ErD0kM_000154.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GMKY1SWCmIQ_000055.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is playing cornet."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3uLmjElob2Q_000004.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3uLmjElob2Q_000004.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from cuckoo bird calling."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3uuyQ4O0L68_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3uuyQ4O0L68_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing french horn."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3vDDWN5GKAA_000139.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3vDDWN5GKAA_000139.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is playing snare drum."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3vLc6D64xcs_000330.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3vLc6D64xcs_000330.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of this sound in this scene is using sewing machines."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3vpG1PgFF34_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/ATAL-_Dblvg_000000.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of the audio in this picture can be identified as playing harpsichord."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3xh2kScw64U_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/16CvcIXIjzQ_000332.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from reversing beeps."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3ypSlMWAZeo_000239.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/3ypSlMWAZeo_000239.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "The source of the audio in this picture can be identified as people marching."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4-DcTjFR4qw_000100.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4-DcTjFR4qw_000100.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "From this picture, it can be found that the given sound is produced by car passing by."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4-SlE4qtKvw_000222.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/GUJCLsTnqTA_000192.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "The source of this sound in this scene is fox barking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4-SlE4qtKvw_000239.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/FzzboZDObbs_000041.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from fox barking."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/40s88hEcn5I_000170.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/-K-ccLMFE5M_000259.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from engine accelerating, revving, vroom."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/40sAH2ZB0Pg_000030.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/40sAH2ZB0Pg_000030.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "Yes",
        "text": "In this image, the given audio might originates from playing banjo."
    },
    {
        "image": [
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/42vB40Fkdis_000076.mp4",
            "Benchmark/VGGSound/scratch/shared/beegfs/hchen/train_data/VGGSound_final/video/4UCZF_UnG10_000027.wav"
        ],
        "question": "Is the audio compatible with the video?",
        "answer": "No",
        "text": "In this image, the given audio might originates from airplane flyby."
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000429598.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3ne5fg0ysur5j-37XITHEISW9IXJ9PI2VG8M9H6NQRCV_429598_356764.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "kitchen view with brick framework around the sink and by the oven"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000042070.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/me1wlqd3fuivx-3XCC1ODXDLBMRKJ8NKWBSRYYEHMRQT_42070_422240.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "blue white and green passenger bus parked at a stop"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000044652.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1398enww3zl0i-3TVRFO09GKFVXJP74EU70NLS5GGXLE_44652_31739.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "an airplane is flying high in the cloudy sky"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000315257.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m34slymvw3r4hz-392CY0QWG1R29GTRZ0VPBWW2V8HI4O_315257_714419.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a blue black and white bird a bowl of birdseed"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000191845.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mtpgo59kyn0l4-3T111IHZ5EQGZL2M6ILP6W894R7R9G_191845_392292.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a group of people crossing a street with umbrellas"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000377588.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/15/m2pzxt03kotnah-3FDJT1UU748BBUNFD8ZBT4HTNBS5KO_409358_424680.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a bathroom with white and black tile a sink and toilet"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000546976.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/13/m2852dxyq6e2uj-3Z2R0DQ0JHEGQXCIZ3MAX21CJ452E0_563882_220475.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a woman sitting in a seat holding a cell phone"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000402346.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/ms7s6lss73vb7-39ZSFO5CA8WXLP7CWO77019W4CTJU8_402346_184778.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a counter with a flower tomatoes in a bowl and silver canister"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000135890.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/23/ms7s6lss73vb7-3JCG6DTRV3QMFJ48A57BN1OTFMOQQA_515077_404436.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a person standing in a room looking at the tv screen"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000494759.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1bqqrrs1kx103-3IOEN3P9S7J5OX17AH3YGVZU7PH61R_494759_59092.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "two people on a beach flying a kite"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000420069.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/8/m1cojelv3lxozk-3Y5140Z9DXGOY9FAXKNDEU7XL5ZIP2_491213_353356.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a motorcycle sitting in front of a classic ford mustang car"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000466567.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m11nxcwhi1951x-3WLEIWSYHOHSA73KP4UGI5YP8P5H2A_117914_640714.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a bathroom with a toilet window and tub in it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000257478.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/9/m1ctq32sndw7z8-3S4AW7T80BI4ZH1XK3S23COUCLA4L5_401244_47921.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a person playing frisbee on a field in sport wear"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000033638.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m135oskcs2l9nh-32ZKVD547FN74CWHTCYPF40JJTUB3Z_33638_251421.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a woman cooking on an old fashioned stove"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000097022.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m22ctgw7k48f5z-337RC3OW0523TUKC4ZTPC2O7PSILVJ_97022_222459.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "some wood cabinets in a large home kitchen"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000529568.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/7/m1yqqk4y9sb8nj-3RJSC4XJ10U9Y4GB9FPTAK2K6MU505_36494_641154.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a bunch of people gathered inside of a building"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000574315.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/16/m2pzxt03kotnah-3PPTZCWALQKVTB5ZX1QOEU0XN9DQZF_356505_222729.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a woman in a bikini riding a wave on a surfboard"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000290293.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1t6bhidokz0q4-3KAKFY4PGU2HRKANZMQ61R4WE3AI3F_290293_701127.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a long subway with people in it is lit up"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000000785.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m12xyeia1z554i-37C0GNLMHF3ZGSHJQANTHX03W55D6B_785_502265.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a person on skis makes her way through the snow"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000225670.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m142hce9axjif8-3XIQGXAUMC8WIY050KTXHI3R6TO7XI_225670_585520.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a dog leaping into the air to catch a frisbee"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000189436.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m13ipmjxdzuu3z-39JEC7537U1AQQQ6RM0Q9JYKVUNVCI_189436_769719.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a carved bear that has a ribbon around the neck"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000360393.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2dakdkjld9ctr-37FMASSAYCRMUFE0EF5CJA282BRBIZ_360393_614600.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a pizza with meat and red peppers and parmesan cheese"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000321118.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/18/m2hzy7r5dftwip-3WSELTNVR327KJPJ2D4K26KBXY8TA3_421923_548275.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a room witb a statue bookshelves books and a vase with flowers in it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000528578.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3liyhw6szfv8-3TE3O8573085Z1HNL96AW90VU0X2RP_528578_214152.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "the big ben clock tower towering over the city of london"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000233238.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m105cb15dj3kya-3RXCAC0YIRPPWQAYLFLZ7WZRIPPG8L_233238_75429.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a group of people gathered around a table filled with food"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000077396.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/15/m2ppbejt2j96dr-3ZSY5X72NXBJ686S87473M3YW7COR5_54628_97420.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a wedding cake with flowers in a banquet hall"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000474167.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mjrmop9131tql-36H9ULYP62U827AM1ZC8E18TXKDFJX_474167_494256.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a woven basket with slices of pizza in it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000293625.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/16/m2ruyqzv9gyz7y-3MX2NQ3YC9UHVUMMCRE0JYPVJPMX5U_183500_454512.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "an old biplane that is flying low for the crowd"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000154004.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2t92w2776m1uo-3O6CYIULED1JR97B3NQMPR34697WUN_154004_651325.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a group of people on a beach looking towards the water"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000078170.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mid2t6rds5niw-3WRFBPLXRAO95LLEW23Y6Z0DZHON3D_78170_143157.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "passenger train in the night hours with no smoking sign and two women"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000552902.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m304gvmq051v89-3YHH42UU5BFN0TJW1250R9YHVW1L0N_552902_697777.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a zebra grazes in a field adjacent to trees"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000526392.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/11/m20jvpl9hcbmtw-32UTUBMZ7GWRGLY0DM4VYZS2D5WVBC_176799_735608.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a couple of people skateboarding in a graffiti filled area"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000006471.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mdj93q7nlh6eq-37Z929RLG98BKFBDJDQBN6EHK38TSR_6471_827020.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a baseball player holds his bat and waits for the pitch"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000297562.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mb5uog25tvpfx-3FDJT1UU748BBUNFD8ZBT4HTJI1K5M_297562_20998.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a little baby is getting a haircut in a pink chair"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000334719.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1398enww3zl0i-3MH9DQ757WCNUNHBOIM43OHGFHBUGB_334719_598831.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a family are playing frisbee on the beach"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000315257.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2djbtaq9h9634-3YGXWBAF70HBWD7R7QZ30TY7INW4CB_315257_710900.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a bird with outstretched blue wings is sitting on some bird feeder"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000562121.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/18/m2dxb55ke6ocui-32ZKVD547FN74CWHTCYPF40JMPHB3H_536947_247820.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "an area outside a dwelling that has been flooded with standing water and trash piled up"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000112997.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2hd26rt6ldyir-3GGAI1SQEVYR03WXDUZZMNF74XGCMF_112997_587132.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a man that is holding some kind of radio"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000464144.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/14/m28dd7t7nym8ui-308XBLVESI4ZNEHJ42ZLX23ZSZ0RBW_251140_500413.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a manual or book about tenspeed bicycles"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000005586.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m3a1u9ymfa3llz-3R5F3LQFV2KS8ZSJIOYKPP5X8F4OZS_5586_783234.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a man standing on a tennis court holding a racquet"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000229858.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1bqqrrs1kx103-36V4Q8R5ZK0VU46GIC0U6S4O54SQMQ_229858_195057.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a deer comes out of the bushes for its child"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000128476.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/6/m1ui8b2a4ow670-3HHRAGRYX85D28A6EQ4LC134Q0L9OC_176634_611670.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a zebra has its nose pressed to the ground"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000189436.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/21/m3sv43zxwkl7a-3IXQG4FA2TYTJELXALZZ6H35JJMB9U_287667_194427.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a wooden cutting board a knife some carrots and onions"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000234366.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m3aooghuphurfc-340UGXU9DY16CK76DDLQ3PKQY34VUG_234366_248968.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a clock tower is shown in the middle of a display"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000483531.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m19aricqa6ak6i-3YWRV122CSZ818BT5KAGBB0IOH7U83_483531_616222.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "the room is equipped with two seperate twin beds"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000049259.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/16/m2v84mgw1shfqm-34Z02EIMISDBJ6R1AB7RD08YLK30T3_170278_3996.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "dog smiles while sleeping in a bed room"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000562197.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/m304gvmq051v89-3OXV7EAXLEQ1Y0FMXLHH4OER8ZR36V_416758_697688.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a herd of black and white cattle standing on a lush green hillside"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000454067.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mj74ga4sby01r-39OWYR0EPKRYXW5LNZDBOONH7B3FYF_454067_439482.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a person with a remote in a room"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000007278.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1z35hh9cgdwtj-3A7Y0R2P2OOP2T171TIUK8801QXJXI_7278_687469.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "there is a male surfer that is riding in the wave"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000527695.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/23/mbr1qmlf104xc-392CY0QWG1R29GTRZ0VPBWW2U5SI4S_249025_20431.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "two orange roses in a small vase with water"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000458663.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2tjyth1h2sj4q-33L7PJKHCGYT1VW45FK66FEG7778T7_458663_814499.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "modern kitchen with dinning area and all wood flooring"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000266981.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1ctq32sndw7z8-39U1BHVTDLR70YQYTYPAFY65RN0T3P_266981_603667.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "people sit and walk around a large hotel parking lot"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000364322.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/2/m15gxvhpg896x2-3AZHRG4CU4K6KPT7VW10VN40VIW30T_38070_553816.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "an overhead shots of a toilet in a public restroom"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000437205.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/19/m2jo5buo9nfp13-3GA6AFUKOOOHVPZ3T48L1F3HD6Y3H5_78266_672514.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "kitchen utensils and appliances have been left unattended"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000325114.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2saqi3zjrms1y-3F0BG9B9MPNLI3QF5GFZ0WA089EY7V_325114_630704.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a little red tabke with a plate on it in the bathroom"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000105923.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m12xyeia1z554i-39LOEL67OS5O2EYAQ0EFO2AIEG838G_105923_746571.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a stuffed elephant standing in a museum window"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000366178.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/7/m1yqqk4y9sb8nj-3N4BPTXIO8SSW9DUE97J1G99M9FKUD_545129_361834.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "three zebras huddling together in a very dry area"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000273760.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m142hce9axjif8-308Q0PEVB8D36MZ3GVRGS47IED09I3_318114_108578.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a white plate with a slice of cake and an orange wedge"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000068833.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1cqc64q22fzgl-3O7L7BFSHEPK1IQKO672KI8T868EI5_68833_352767.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a counter and some chairs in a small room"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000127987.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m10sise0wej423-39ASUFLU6X7HRDFFNS73NSV8SGQXES_508730_813261.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "two babies sitting on their potties in the bathroom"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000024027.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2e29njkb7ltr3-37U1UTWH9VMGLGJC1QFB60DYKBHR8Z_24027_782138.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a large kite that is being flown in the sky"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000054967.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2h342js55jj2j-3A1PQ49WVHHLLMLO60U2DMOKGMLH1T_54967_671640.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a city road with cars truck and traffic lights"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000485480.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/18/m2fy31gkekkszg-3B2X28YI3WF61VJZCEWNM7C3S8BB6V_465822_528161.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "several people in a room having a meeting while someone is recording it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000060347.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/14/m29l7oow4wnodg-3ZSANO2JCF711ATCOTLM62ZGTICSF6_436738_382256.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "an old photo of cars in a city street"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000007278.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m10sise0wej423-3A7Y0R2P2OOP2T171TIUK880XHVXJ8_7278_690046.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man in a wet suit riding a wave on a surf board"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000163640.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12e12lh0tbjh9-3PW9OPU9PQKH6WI6NFRVQIIIU0D122_163640_311802.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man has stick which is holding some bread between his fingers"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000191845.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/14/m2m6x0t4hovpdw-3300DTYQT2HXIGE31ECBTKZFVFGQE5_438876_697417.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a tennis player eyes the ball and prepares to hit it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000378116.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1398enww3zl0i-30MVJZJNHMDZ1XJ9YDCG45MIUOF9J0_378116_466173.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a surfer wearing a wetsuit is riding a wave"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000555012.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/7/m1am4ui422rbwp-3NVC2EB65QZ3HKPS34C9NFYK7YJY33_99054_67558.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "an airplane is at an airport with yellow hoses by it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000288584.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/22/m3bviogz8ozti1-3ZPBJO59KP1FDH10MTEXEOPT48WHD4_376478_328769.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a single white rose in a glass vase"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000267537.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3TMFV4NEP8ELTC0Y0KPHTZLFB1P8WX_391648_319032.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a tower with a clock on it during the night"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000539883.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1ivvyi8h0o3rc-3UOUJI6MTDEYG9C1DSM8RAHBI6FUX9_539883_320428.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "the television screen ifs on the roof of the building"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000119365.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/14/m29prsq6csammx-33CID57104TJHLITKPNJT7WOYLBL3Y_460927_476895.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a majestic bear looks out across a grass plain"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000498709.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1aegpn78bbwvh-3FPRZHYEPY7MDQUNYTPPGGUQM2EV3R_498709_594760.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a black and green parked motorcycle some cars and buildings"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000119088.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1ctq32sndw7z8-3NG53N1RLVJV933XOYBM15DJJH8P8W_119088_293.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "the men are up to their knees in the water"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000464522.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2me2l9b0hh2zb-3I7DHKZYGN00V8V1CX8EDRPOFB55F8_464522_151751.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a little dog looking at another dog in the yard"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000493613.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3QL2OFSM96IXILHXPVEZZ2479EDNCV_493613_578596.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "the upturned skateboard is below a floating pair of shoes"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000090108.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mwfcp9k44s495-3IQ1VMJRYTKO04G547Z3B6JZ00PA9J_90108_676529.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a white bathroom sink sitting under a bathroom window"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000297562.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/2/m15yw127w9r2pl-3S4AW7T80BI4ZH1XK3S23COU9O4L4J_305695_375735.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "zebras are standing in a fenced in area"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000414340.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m19cox11w56s76-3IHR8NYAM71UQ2GVCVL9KTXEGMYP4S_414340_190850.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a tall clock tower with trees all around"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000320642.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2me2l9b0hh2zb-3JMSRU9HQIUPNOAKND6LZ7YNC54EVS_320642_615325.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "two women standing next to each other with one holding video game controllers"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000405972.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12e12lh0tbjh9-3B2X28YI3WF61VJZCEWNM7C3WC96B0_405972_777365.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "an elephant leads a baby elephant towards a door"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000368982.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/14/m2mb6kxtkcv0c6-32AT8R96GL9QKSQ2JIGLS20J3MNUS1_489764_77720.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a woman holding a game controller with a man looking on"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000383289.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/14/m29trjokxf9t8s-33LK57MYLT576RKCP5FT9UFY2TSZSW_495448_151758.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a wedding cake is shown with pink petals"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000328030.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/18/m2dt2jj9kkp38l-3A1COHJ8NJV3WM595L3HSWBSXKK8HK_115245_233493.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "luggage and coats lined up at portable tables"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000512776.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m13ipmjxdzuu3z-3WS1NTTKEYC10190FR22WWTXYB20F6_512776_518442.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a catcher crouched behind a baseball player at bat"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000054967.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/7/m1yqqk4y9sb8nj-3FE2ERCCZX8YUVQDVF0IYR3JYY2POD_186624_707622.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a train parked inside of a train station next to a person"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000441586.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m12xyeia1z554i-3B1NLC6UGZWA2I9F7WNATIUH4FVGP4_441586_112654.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man on a beech vehicle making his way thru the sand"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000506279.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2m6rt0r7xp9zp-3K3R2QNK8B38FDU3ACGUB7VTB2D9US_506279_161279.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "stemmed glass of amber liquid on a counter"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000432085.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/16/m2saqi3zjrms1y-39L1G8WVWQR6REEPRF509SNAWH731B_134096_118708.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a cat drinking water from a bathroom faucet"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000410878.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/8/m1cqc64q22fzgl-39O5D9O87TSQEHON7CUKQO614AHC3P_563604_687871.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a electric sign sits next to a park"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000143931.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/16/m2pb95m3dbcg9m-3B2X28YI3WF61VJZCEWNM7C3TI9B6E_473219_687166.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a man and woman look into each others eyes while getting married"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000398905.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/14/m29prsq6csammx-37TRT2X24QRIPQY6WXQCVBH8IOIBJC_541055_604434.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "the skiers are happy to pose for the picture"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000370375.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m3xm5b3z3wu8g7-3IUZPWIU1O75OD2Z99MKJ4U67COWKW_370375_176138.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a young child in a khaki pants and a hitting helmet holding a small baseball bat while standing in the grass"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000186938.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/13/m28dv2tzcfv0jc-3WOKGM4L71GV6EMTEULSJQ2BN020OA_150417_500217.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "an older woman sitting at a table with a cake"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000227491.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1yqqk4y9sb8nj-32KTQ2V7RDFP25PU1AP8KXEZUJAM9F_227491_346377.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a woman is sitting on a bench and is eating something"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000380711.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1yqqk4y9sb8nj-3RUIQRXJBBO0XP8IQVRJPTQ5U93LLD_380711_301091.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "on a gray day a surfer carrying a white board walks on a beach"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000073533.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2zco7272xa5e0-33C7UALJVLYSSBKV3W0W09VLGN881P_73533_781955.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a few boys read comic books together outside"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000516601.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/6/m1vve3uwxjdzd9-3X87C8JFV6BYSZJL29WGI13U0JWSQ9_458702_15846.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a view of a street with some trees around it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000042070.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/15/m2ozbwnlhocpe7-3NL0RFNU0FNTFBJFZ3G1OCBFN8H4KO_234413_730212.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a small white toilet with a toilet paper dispenser on the wall"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000508730.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m25zm9k3s80nh1-358010RM5ETYTO11IIWDNWJUTHHVXX_508730_804324.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "two naked babies on training toilets in a bathroom"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000041633.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2p98h3lkipyva-3RYC5T2D73T1R8ITN7HMSM8O57DRPJ_41633_5987.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "an old model red chevrolet truck is on the pavement"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000142092.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/19/m33xz0p9h20unm-3J2UYBXQQLC5H8GPEYZY2QGK1DV06X_235241_627628.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "woman and man playing with a disk toy outside on the beach"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000088951.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2dqnnyhnrnirp-3XM0HYN6NKZCI4DO11XMTF95UVREPR_88951_186776.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a man and his dog enjoy an afternoon in the park"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000061268.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/m12e12lh0tbjh9-3FTF2T8WLRIL7HJ85CVVTVGQW6P9W4_286994_446182.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "one elephant standing away from the rest of the herd drinking water"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000031817.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1398enww3zl0i-3DEL4X4EL6LFXIW6NTJXUVNOEVMYX9_31817_185222.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a person on a motor scooter with a helmet on"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000303908.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/5/m1qe0l6hma6k1s-3WYP994K17R2E3CAM6A7UIA4EDKY6H_339870_362952.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "some stoplights that have every light lit up"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000000785.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1qe0l6hma6k1s-3R6P78PK7KB8UA2MODWUYSVES0ZGTR_785_501005.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a woman in a red jacket skiing down a slope"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000140583.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m304gvmq051v89-317HQ483I7SOVO3XH5ALA522MLZNI3_140583_383237.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a group of donkeys on the side of the road"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000355677.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m15by8wmv40jh7-3DI28L7YXAE91CUMGZMWSJR7RY61EX_355677_163349.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "lifeguard station on a beach overlooking a swimming area"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000125257.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/14/m29prsq6csammx-3DIP6YHAPCSRCCE7FKVB7TEF59AE82_556158_99147.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "man cross country skiing on slight down hill slope"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000301563.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/m15yw127w9r2pl-3DBQWDE4Y6YCJ08IALRRLWYXEVDN52_23751_180035.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a statue of a boy and a girl flying a kite in front of a building"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000380711.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m253dwe1ylfsrg-3D3VGR7TA0FUKJD6P9KFFJ5N1DER35_380711_301775.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man walking near a shoreline with a surf board under his arm"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000235064.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m19rw7skrmc7s9-3DY46V3X3PI8TB2C6LV5RFYBQDY55O_235064_315936.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a bear walking up on top of a wall"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000065798.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/8/m1al5uisyf5izd-3KV0LJBBH2LV65LGVRPUBIET1QAMR3_571718_144478.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a man with graying hair looks down at a stand full of yellow bananas"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000211674.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/my36w1vkw4eo1-3T3IWE1XG6NZ7ZW0R916S6ZGYG1TQE_211674_407185.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a double decker bus driving down a street"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000454067.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m3heg6dpzl1r7y-34HJIJKLP5W7VMDRY7U8QLMW3HB4VJ_454067_438672.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "someone in green is playing a golf game for the wii"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000185250.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3OVR4I9USPJFQEHAC80YKYNOMLHQ4O_185250_506162.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a man watches as a dog prepares to jump for a frisbee"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000257478.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/7/m1yqqk4y9sb8nj-3BF51CHDTVALEX06AHQ2ZH5ABBKH0T_269196_763968.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a herd of sheep gather near a fence on a snow covered ground"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000468124.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/21/m3wmv085v9ad0l-3YDGXNSEOZU9KAHEQEJYY72OP3Z848_485130_457244.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "there is a birthday cake that looks like a boxing ring"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000407868.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m15gxvhpg896x2-35BLDD71I6XNYJ1GJ0KYKHZ51UNZVW_407868_292466.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "the man in the park is flying his kite"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000152465.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2xvfotbdlxse7-3DZQRBDBSLF6LYDJ4YKK5TAJQ98S3W_152465_771557.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a fire hydrant with a tire underneath it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000328430.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/5/m1t6bhidokz0q4-34BBWHLWHABEIICB96186OFTEY1IWO_474854_68074.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a man eating a banana and smiling at the camera"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000410878.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m253dwe1ylfsrg-3VHP9MDGRNKLUT4GR7VMRXVY7PKCFA_410878_271860.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a motorcycle parked on a gravel road near a park"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000529568.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1l56gkpw5a6ik-3D8YOU6S9EKLXUS5C5DIRX4RI6I6UY_529568_329184.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a nice kitchen has a large crystal chandelier"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000349184.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/5/m1t6bhidokz0q4-39OWYR0EPKRYXW5LNZDBOONH7SGYF9_579635_553035.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a man on a surfboard is riding a large wave"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000463849.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/16/m2sbdmlycpdbvb-3OONKJ5DKCJWQ1P3CAIH37YUDFRBOK_422886_579959.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a man eating a hotdog that has a lot of cheese"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000143961.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m3m455rbyyfudl-3MMN5BL1WZ43N30EK1OYTKBDY2U3MZ_143961_709737.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "people are gathered together sitting for an event"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000101420.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1f19x2qkfpstm-3VA45EW49NNVD37BADPSXVLLVTB1OM_101420_788595.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a cat that is looking out the window"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000407868.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/18/m2e29njkb7ltr3-3A9AA95ATWMCMLKVQ04JPL3SHD0P5V_493864_208050.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a man standing on a wet beach holding a blue board"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000143961.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/13/m286i816oyulx9-3PJ71Z61R42S6G358O6A05QW0T991Y_97994_140824.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "three laptop computers and a desktop computer sit next to each other"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000080153.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/ms5byh0ze3m44-388U7OUMF71Y3RE96M2WDL7GXRMR0H_80153_480517.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a person riding skis on top of a snow covered slope near a dog"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000375078.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3mksvep10nwjq-37C0GNLMHF3ZGSHJQANTHX03ZQQD65_375078_19612.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a black and whit vase sitting on a small table"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000128748.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1rksdpxuw4312-31EUONYN2V3BZFN9HR9H4JS1JYLOV2_128748_367745.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a group of three men standing around a baseball field"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000468124.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/22/m3flqyyjmcw5ci-3XIQGXAUMC8WIY050KTXHI3RAI17XD_163258_491212.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a white toilet sitting in a bathroom next to a wall"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000549136.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1bqqrrs1kx103-3WS1NTTKEYC10190FR22WWTXY7DF0O_549136_123683.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a stuffed animal sits atop a barbed wire fence"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000233238.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1yv543m6k6ndb-3A9AA95ATWMCMLKVQ04JPL3SDHJP5I_233238_83463.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a long table with many containers of food on it and many people around it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000154358.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/3/m15gxvhpg896x2-3PJ71Z61R42S6G358O6A05QW026195_352684_382820.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a man standing in a public area near some other people"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000559348.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m27cxn5216p42e-3ND9UOO81K2G8JKHU9YLY47LWE8LW7_559348_615698.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a baseball game in progress with the batter up to plate"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000288042.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3PQMUDRV7R6WHNS6KPXT5X6LXIAIIC_491090_169865.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a man sitting on one of a group of motorcycles"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000328030.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2udz7mhyh5bbx-38F71OA9GTWY3FG74WP0UT0A05ZFM9_328030_659162.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a stuffed animal with a hockey stick"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000153011.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/8/m1cojelv3lxozk-3EJJQNKU9R59ERK54U9HJD6XNG5HRA_426376_539653.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a man riding a snowboard down the side of a snow covered slope"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000217400.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m11eivx3y0ffxm-36W0OB37HWEIGI6WEB3U5G8WWCZZHQ_217400_199475.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a silver and green train pulling into a train station"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000001818.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m135oskcs2l9nh-3K5TEWLKGVB13TC6JWWCKMWF536VIO_44699_485275.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a herd of sheep standing on top of a hill"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000065798.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/2/m15yw127w9r2pl-3Z7VU45IPYH7C7LICM09DTSPFZA1ZO_557172_714629.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a contemporary bathroom that has a very large mirror"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000394611.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m12xyeia1z554i-39KFRKBFINVS39IEM2W1BIWVZAIOYU_394611_5720.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "two tall giraffes graze on bushes in an open field"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000080153.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/8/m1cqc64q22fzgl-3ZDAD0O1T1DJGWXCDNWYMEV9ZTYXTD_240023_37173.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a tennis player using his shirt to dry his face during a tournament game"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000319534.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3o81ftkkenshp-34Z02EIMISDBJ6R1AB7RD08YLC9T0M_319534_120023.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a young man standing in the doorway of a bus"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000349184.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/20/m3peg8dv9nioyo-3B2X28YI3WF61VJZCEWNM7C3WH3B69_378605_296590.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a donut with a cup of coffee and an ornate napkin holder"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000232649.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/2/m1580yf585y44s-378XPAWRUCDHB598815GOZI2M16AIL_82846_725189.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a tram like bus in a parking lot"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000364884.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/16/m2t92w2776m1uo-3VHHR074H3H1I4JGM1Q96O8408E7L9_153299_542841.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "two giraffes one grown and one baby standing next to each other"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000173302.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/6/m1vve3uwxjdzd9-3EICBYG644W1ZVQCAXM76VNQ0LHCJ7_540502_484336.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a kitchen with a refrigerator a counter and a sink"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000407868.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2r8zl9k35hxtr-3JBT3HLQF82ATZ4KXOBZEPANICFPZ9_407868_291377.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "young people are flying kites at a park"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000086755.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1z6dx4taf02w4-3WZ36BJEV3GC3TUB8R4GXH4HCPBBTD_86755_546540.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "two snow skiers are coming down a hill"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000143931.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/11/m23tl18wfow2pm-3D8YOU6S9EKLXUS5C5DIRX4RLX7U6W_58636_331639.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "street signs on the corner of gladys and detroit"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000315257.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/12/m245obzpgptgkj-3XLBSAQ9Z4CLNT0K12HIE3J2IQQ7ZH_539962_149895.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "there are two people in the middle of the pool"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000044699.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2ppbejt2j96dr-3XLBSAQ9Z4CLNT0K12HIE3J2EMA7ZP_44699_478666.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a group of animals grazing on a lush green field"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000420840.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/11/m201j4v184ku9w-3KV0LJBBH2LV65LGVRPUBIET0BSRMV_87476_351424.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a young man riding a skateboard with an older man"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000145020.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1398enww3zl0i-34T446B1C0EUXPPQOPC1OQ8U10FC0G_145020_530254.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a person who is in a duck costume"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000094614.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mbo2c1gj7m620-30JNVC0OR9K92Q55RFKHENWSOYDQHX_94614_151632.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man skiing down a hill with a tree behind him"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000400922.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2pzxt03kotnah-3FUI0JHJPXY21HS4EJJJXL27X9I33J_400922_144798.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a photograph of key bank with a clock under the sign"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000551822.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/m142hce9axjif8-3570Y55XZPJ4BW1GYJJT69NT72ZYGG_493905_282685.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a tennis player is in mid air with his racquet"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000311295.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2r9w217ua3xjw-3L0KT67Y8EG711A7TJDAVN6WWFTYS5_311295_250631.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a group of zebras in a field by some trees"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000022705.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1d1btuiby3nyb-3CPLWGV3MOZVKNAUDB48XC3Z3YR9NR_22705_546454.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a woman standing in front of a refrigerator freezer in a black dress"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000039914.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/3/m19068sc13ac4v-33TIN5LC04APWME8KDDJES1673X9YP_462031_773286.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a man holding a bat on top of a field under a light"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000068409.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2ozbwnlhocpe7-3XM0HYN6NKZCI4DO11XMTF95UENPE0_68409_657550.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "school boys sit cross legged in front of a chalkboard sign in a vintage black and white photo"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000015335.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2v84mgw1shfqm-3ATPCQ38J8A315ODCJRJANCQAS1YAQ_15335_509731.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a young child lady and man sitting in a booth at a table"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000210394.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2np2ke9gkrc9e-39ASUFLU6X7HRDFFNS73NSV8SA1EX8_210394_565687.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "an old bus on the road and people boarding it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000185802.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/23/mwfcp9k44s495-3I2PTA7R3TU04G6DX0NERFCKJZSKQD_350388_72318.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a cream based soup in a while bowl with a chopped green vegetable on top"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000207844.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3ITXP059PWJH6CF87JC7L0R8LM2SJB_207844_243209.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "people are sitting in lounge chairs on the beach"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000575243.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m162oa8w267rg8-3RWE2M8QWHADOTMHDFLF4126XQL0NJ_575243_696481.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a black and white picture of a persona nd a umbrella"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000391648.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m142hce9axjif8-3FDJT1UU748BBUNFD8ZBT4HTKL4K5W_453708_795035.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a person carrying a crate on their shoulder that is full of bananas"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000288584.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mwyct2ysfgia9-3KV0LJBBH2LV65LGVRPUBIET17RMRI_288584_47924.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "somebody wearing a giraffe costume greets people at a park"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000255917.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1yv543m6k6ndb-3GA6AFUKOOOHVPZ3T48L1F3HDVWH3V_255917_224860.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "traffic stopped at an intersection waiting for the light to turn green"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000061268.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2xlbxnod55qat-3FK0YFF9PZG6PZW6I3VVZBM9DPNVVA_61268_681676.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a big long train that has some storage containers on it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000058029.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2i5nhenp1s08m-3AMW0RGHOD2GCAQ1U005J23E5EYNPA_58029_656526.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a small clean bathroom that is well lit"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000095843.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2idaa63ldiw4q-33LKR6A5KEKBQVCJ6UBRR5R8RCC1TB_95843_432136.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a double decker bus is shown that is not in service"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000163640.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/17/m2z4jrdonitf3h-3B837J3LDOWY40YLFOLGNRDZWRORS7_494188_722293.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a black bird standing on the floor in the street"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000341058.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/2/m15yw127w9r2pl-3U088ZLJVKT3BNVV5GC20KFXPI0W0C_482275_391670.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a bride and groom are cutting a cake as they smile"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000039914.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2wv6wr651h3pc-3PM8NZGV8YG6OG5KWJ23H9KMC54XQ3_39914_665464.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "an adult tries to help a child with her kite"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000145020.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1m1c48prxv1l5-3U088ZLJVKT3BNVV5GC20KFXPI7W0J_145020_531568.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a giant donald duck standing in front of a christmas tree with lots of toys around it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000459757.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m3mdukjdqfqibu-39DD6S19JPB6W8FUOAVOKDBJ45AEZA_459757_819108.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a giraffe leaned over in a plush field next to some deer"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000466416.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3a1xconzsjx7b-3QY5DC2MXRKHGNLGDYGML3H4E26UF7_466416_398149.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "large white buildings are lit up in the night"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000235784.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2xu3nxtoffv4y-3PEIJLRY6TTB8D168S199Y280PCXW0_235784_781639.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a women who is walking in the snow on skis"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000123321.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/13/m27cxn5216p42e-3I02618YA06T70AAR1CRXXV8CZ0UPV_33854_235710.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "two very old time cars in front of a group of motorcycles"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000227491.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mtficeak6tfvu-35L9RVQFCOI1UJCM8O4MOOTHQKIUH6_227491_344832.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a skier sitting on a picnic table and eating a doughnut"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000418959.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/4/m19uvxet3ff1nt-3SKRO2GZ71RCNCMWCLXZQE4C732K1C_94185_601320.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "the store front of a teddy bear shop"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000428280.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/18/m2zco7272xa5e0-3WMOAN2SRBXTHUNXGC94PUSWQX9VN2_559099_659964.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "cows graze peacefully in a field with a dormant volcano in the distance"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000575243.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m135oskcs2l9nh-36U2A8VAG1Z9DIXZOA4841FMEN2YKD_575243_693760.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a person walking down a street while holding an umbrella"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000060102.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/8/m1bee3yy3uph7p-3TUI152ZZBNYYFKRPQHWM0GL78JQ1Y_302760_107392.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a bathroom sink with a marble counter top under a large mirror"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000022705.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/16/m2qxdgs16r6jcx-3PIWWX1FJJ6O79WIKP3TTZ0LNCRJJ4_162130_388819.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a park bench placed near an open field"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000143931.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1nekyabkdb02q-30ZX6P7VF8VO1DYAD4X1UIE4KK72J3_143931_73568.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a large campaign trailer parked in a parking lot"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000186873.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/19/m34w9n21hmvii1-3S06PH7KSR4496GMK4BCM19MRGQ1DD_237071_576542.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "the young man in the blue shirt is playing tennis"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000066886.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mzoowzr1pwdla-3E47SOBEYQW54K66SF17UU8QJ19IC4_66886_411087.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a girl grabs her hair and holds onto her hairbrush"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000052591.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3zbehjp85if0v-3VHP9MDGRNKLUT4GR7VMRXVY7QSFCN_52591_547003.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a classic black and white photo of a woman holding skis"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000046031.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/16/m2v84mgw1shfqm-3ATPCQ38J8A315ODCJRJANCQAS1YAQ_471087_655156.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a man with dark hair a shirt and a tie posing for a picture"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000033638.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/5/m1qe0l6hma6k1s-317HQ483I7SOVO3XH5ALA522IV5INK_263403_401575.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a surfer flies off the crest of a wave"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000334719.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mx6dytiz3lwj9-3EFE17QCRC5LF6KIJJ9M9NBT9JRHS3_334719_597757.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "people walking on sand next to a mountainous area"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000494759.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m135oskcs2l9nh-3TU5ZICBRD1G9F4COBQB1A3TKFSQ8M_123321_828325.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "the bowl of soup contains a large amount of broccoli"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000047571.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/11/m228enkx9s6gwe-3AUQQEL7U5TQWYVQF676EIWJMIXV0M_279541_727057.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "several different kinds of pizzas displayed on a buffet line"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000119677.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1z35hh9cgdwtj-3XM0HYN6NKZCI4DO11XMTF95T7QEPD_119677_770082.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a square of cheesecake on a marble cutting board with a twopronged fork"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000324158.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1ctq32sndw7z8-34BBWHLWHABEIICB96186OFTF41WIF_324158_310079.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a man is skate boarding down a path and a dog is running by his side"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000424135.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m135oskcs2l9nh-37FMASSAYCRMUFE0EF5CJA282BSBI0_424135_293943.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a statute built into the side of a building"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000212166.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/4/m1or6g3sr7plqv-3N1FSUEFL50L15D57MUEKZGP0RBD4S_227187_792294.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "some green yellow black white and orange birds and some plants"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000318114.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/14/m2me2l9b0hh2zb-3OUYGIZWR7YDREYUTUY78Q71XMZP0F_270474_275450.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a blond girl holding a baseball bat and smiling with her hand over her mouth"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000407868.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m11722d3cdnzvr-3Z4XG4ZF48R0IC5OAPL3VIZML6V8XX_407868_298898.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "this youngster tries to get his kite off the ground"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000151662.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/9/m1eiseruylvcq-3E7TUJ2EGCMMYBJHOHQ517R5LUW9DC_76547_693738.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "there is a man sitting at a table using a lap top"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000473015.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m11gtqron6cr5y-3UOUJI6MTDEYG9C1DSM8RAHBHZDUXS_473015_710539.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "there is a duck that is sitting in the water"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000009772.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/13/m28dv2tzcfv0jc-3EA3QWIZ4IVMQ18HE1WXJ48UA5KTIC_304365_644211.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a large long train on a steel track"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000496597.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1l3vo2bi0x1zk-3VJ40NV2QINWMNJKCW92314GNOVOTZ_496597_56616.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a canoe boat with 2 people sailing with mountainous background"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000297562.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/17/m2xvfotbdlxse7-3OUYGIZWR7YDREYUTUY78Q710D2P03_572555_204085.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a yellow and red train stopped at a station"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000186938.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/21/m3wy6mr7osmetv-3H7Z272LX77QOANGINKQ6AZP03CLPO_521231_5370.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a polar bear is standing next to a rock"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000556000.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3LYA37P8IQND0A4OEIQOWQHLXCKKBK_540928_610344.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a cat that is standing on a wood floor"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000458663.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/5/m1qe0l6hma6k1s-3QECW5O0KH1AEDD27HD3UV4P2K4T5R_179214_759580.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a small pizza on a paper plate"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000398905.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1q23iu1uev7hk-3EG49X351UCDEYM0HAO5B43VBB36X3_398905_276662.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a man with a beard at sound board in front of microphone"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000083172.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m071506418gb9vo0w5xq3-3LUY3GC63Z0R9PYEETJGN5HO4UEP7B_333745_580115.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a woman holding an umbrella walks past the side of a bus with a large advertisement on the side"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000438226.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/18/m2ce6ayeueifei-3IKZ72A5B4G4LX17GX4078PA3O7NFZ_85089_775815.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "people holding up for wineglasses to each other"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000125257.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m11nxcwhi1951x-3U84XHCDICDO46I1Z4JBBRZSRTO4ZS_125257_142036.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a man on a snowboard on a groomed slope"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000142092.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/21/m3wdsboj70yh0d-3ERMJ6L4DYSLOM11MDRO61NT0AA7MB_452321_431879.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "red bus sitting in a bus station parking spot"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000135890.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mwyct2ysfgia9-36W0OB37HWEIGI6WEB3U5G8WWODZHS_135890_103459.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a large building with a tower and a clock"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000255917.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m12xyeia1z554i-33CID57104TJHLITKPNJT7WOVLOL38_255917_198376.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a group of cars on a street with a domed building in the background"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000322352.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/10/m1govpedzbnfni-3K2755HG5S3VZLAUR3QXB3DGSX4FD1_281693_804833.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "an airline with airplanes from one world parked on the air field"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000414385.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/13/m26snmxkuuaa4c-3E4GGUZ1T8RJCX4SVPXDVXEHZ91K2E_482585_648663.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "two trains parked on the tracks near a platform"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000479596.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mzgo81a5qd3ux-3M0BCWMB8VW4VAY53W9RK1BGK6SBWG_479596_762440.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a shelf with a lot of assorted colorful fruit for sale"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000491090.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m3orxfdski8iux-3ATPCQ38J8A315ODCJRJANCQC0QYAX_491090_174560.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a man is smiling sitting on a motor bike"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000466835.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/13/m25rt7bjo446ds-3VFJCI1K4ZZVEVPUKORZYDUW79EGRH_91406_100857.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "four women stand behind boxes of large pizzas"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000267537.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/14/m2maclcdhqopmp-32Q90QCQ1SLB7DG8O9SEXYN6Z6KKE0_360097_470818.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a pile of luggage sitting up against a white fence"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000090108.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/6/m1ui8b2a4ow670-37XITHEISW9IXJ9PI2VG8M9H763CRW_187990_683834.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a man who is skateboarding on a ramp"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000001818.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m135oskcs2l9nh-37C0GNLMHF3ZGSHJQANTHX03WIHD6D_186873_779382.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a thick evergreen forest marks the boundary of a dark expanse of water on which rests a long boat with packages at the rear and people to the fore several holding long oars"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000047571.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mgi22le4ozdhc-3K772S5NP8BK5NF0IOPHK67E8HMHEK_47571_112343.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "some people sit in a carriage pulled by a horse"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000060102.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m302warakxp89m-339ANSOTR52NNIIUKSI5IDLYS34KIV_60102_173973.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "two opponents are playing during a soccor game"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000394611.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1zad6q0if0k14-37TD41K0AH9UYY92XRVLYWEDTBTSCD_394611_6122.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "two giraffes eat from a tree in the wild"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000094614.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m3yw7r6wf80mlf-37UQDCYH6XV4E9VA05SFTX66UFL7VI_94614_117816.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a person in a green jacket skiing down a slope"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000298738.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/8/m1bqqrrs1kx103-3Z7EFSHGN9E2UFV1R118X4V58TAXCB_70774_200963.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a motorcycle parked outside of a building with bird cages"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000061268.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3zbehjp85if0v-33OOO72IVHLVDYMHMR0B81AZYMGTCZ_61268_682888.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a train passing through a station in the middle of the day"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000395343.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/3/m15gxvhpg896x2-3PJ71Z61R42S6G358O6A05QW026195_352684_382820.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a man standing in a public area near some other people"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000300341.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mgwuc4vd04ypv-31N2WW6R9RQXHT8XYEKLYUR41J93FJ_300341_574673.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "two college age boys playing wii while others look on"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000032038.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/23/mttfe1bzj9l0v-3Z7VU45IPYH7C7LICM09DTSPF0EZ1S_507667_131876.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "subway map art cow sculpture on the sidewalk"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000001000.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/17/m2xh5432bh1e39-3VHHR074H3H1I4JGM1Q96O840QT7LO_344029_149116.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a black passenger bus navigating paved city streets"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000154358.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/22/m3ewmhxlodd07t-32SVAV9L3F92L2RPNYYTKTGD43HA3Y_439854_391386.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "children are playing near a beach at sunset"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000043737.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2090edhcxim3r-3WYP994K17R2E3CAM6A7UIA4DHZY63_43737_106101.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a snow covered city street lined with small shops"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000117645.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mr2dhrdsb7sjj-3PWWM24LHSYZHW2RM00A5S5Q85H82P_117645_349967.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man riding a snowboard down a snow covered slope"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000364322.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2xuviorh8l09j-3K2755HG5S3VZLAUR3QXB3DGP6KDFU_364322_521632.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a number of sheep gathered together looking around while standing on grass"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000488251.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m12xyeia1z554i-33CUSNVVNNCA3B4G2H9BFJITTH1887_394328_654378.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "this bathroom has red and white walls and a poster"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000426329.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/20/m3ntia6tnion6b-3XUHV3NRVKYK94M77NWEGC11VDA5H8_276018_556974.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a bunch of kids walking through some grass"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000267903.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/2/m14zb6j071e7sw-32SVAV9L3F92L2RPNYYTKTGD4W03AW_312549_109907.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a double street sign at a neighbourhood corner"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000481159.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/21/m3wy6mr7osmetv-3TS1AR6UQQEFIC9K0QNBBB3K6II7F9_27768_221327.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a white and red bus driving down a street"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000408830.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/10/m1g69cpy8gujl5-3PXX5PX6LXY7OXV22RDZ2CEPSO3BAL_273232_677042.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a man and his dog are coming out of the water"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000177383.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2p98h3lkipyva-3JJVG1YBEBXAIRJL7VMIU1Y3X585B2_177383_745177.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a train engine is on the railroad tracks"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000122217.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/8/m1bee3yy3uph7p-3GD6L00D3SX7D0MZXXAF5RWGXAW1M2_445846_20850.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "an office kitchen with open windows and no food"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000057597.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2vcyzg03qiaqm-39GHHAVOMFRY4RDXHIGPNXB25DYJ46_57597_513091.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "girls face to a soccer ball during a game"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000008762.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m12xyeia1z554i-345LHZDEDXSM0B5NT3FKKUS78EZ3U6_8762_779960.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a night scene with silhouettes of buildings and some traffic lights over a street"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000351823.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2r2ejpxleka10-3TEM0PF1Q5X42HV4OL7C8WQCMNVD0V_351823_123999.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man holding a tennis racquet on a tennis court"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000562121.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/11/m22ctgw7k48f5z-378XPAWRUCDHB598815GOZI2KAGAIB_468925_478255.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "banana pieces placed on beacon and sausage on a white plate"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000052591.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m126sp9gpwcfnl-3PQMUDRV7R6WHNS6KPXT5X6L0X3II2_52591_547297.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a female standing beside a wooden post with a fake background"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000315187.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/10/m1h82eis9ymnsa-3NVC2EB65QZ3HKPS34C9NFYK7UZY3B_3156_746718.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a man fixing a toilet in a black and white photo"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000360393.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/17/m2v84mgw1shfqm-3RANCT1ZVFHR36908WUQ2DQJV3EUBJ_387387_462721.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a school bus sitting in some overgrown grass"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000360393.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/7/m1zetzt3yzrdsi-3ZGVPD4G6TH8LRXRE5KJJZBK49XZT6_285788_757985.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "two giraffes and an ostrich at a zoo enclosure habitat"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000325114.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/2/m14zb6j071e7sw-30JNVC0OR9K92Q55RFKHENWSRVQQH7_451084_117340.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a little kid that is doing a skateboard trick in the air"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000383289.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3a1xconzsjx7b-34BBWHLWHABEIICB96186OFTFXFWIF_383289_197715.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a security officer sitting on a fence while talking on a cell phone and holding onto a segway"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000398905.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/19/m34v77cqevjfee-3MB8LZR5BFTPDJQ05VFIATDQ97SKL3_141821_146864.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "there is a very large pizza with different toppings on it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000562561.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mbbzfg0xq3f9x-3YW4XOSQKQLQQ8R86Z11W1DAAR3U1H_562561_120580.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a brown dog sitting in a yard looking at a black and white dog"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000167159.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/6/m1veoii4hrj0th-3AQF3RZ558IWECVFHGIJ5X3OA3FF6Q_67534_622459.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a man wearing a collar shirt with a blue tie"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000138979.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/5/m1txs5zz2u16sz-36NEMU28XFD0E122ULZ0YHMAWI2MWC_368961_72345.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "two elephants standing behind a rope in an enclosure"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000400922.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/2/m15gxvhpg896x2-39ZSFO5CA8WXLP7CWO77019W7KRUJ0_442161_682458.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "three people pose with wineglasses held up to their faces"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000245764.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m105cb15dj3kya-3S0TNUHWKTIMK607JPKR1BWU5WMD8J_245764_370654.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a cat laying on a toilet seat with its paws inside"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000001296.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/15/m2o0czquaewmh5-3L4D84MILZSSG31SWP08HMK6DEAJHD_367680_657288.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "several vehicles and a horse drawn cart pull up outside of a building"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000527695.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/10/m1govpedzbnfni-3HUTX6F6VUN22OP7THP62EVQNZV2OA_476215_201915.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a black and white photo of a farmer standing next to horses"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000264968.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/8/m1bqqrrs1kx103-3KJYX6QCM9BXY4WC1EHCWNBFH7LVJ3_511647_123839.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a man flying a kite above a tall building in a cloudy blue sky"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000497628.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/13/m266b4c0hqg8oj-3Z3ZLGNNSIUU8IY6CUVFATVI8QSQ3K_405306_757833.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "this is a grey cat laying down on a bed"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000172595.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/15/m2obpj1u7bze0m-3OVR4I9USPJFQEHAC80YKYNOMLG4Q1_437331_397676.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "man on surf board riding a wave of green and white"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000235064.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/3/m184qo4gapsoe-3J4Q2Z4UTY3RZG0UME6HFQ6F3K5WQE_472298_721739.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a man and woman riding on top of a red boat"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000314034.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m26qs044w4aqo0-3L2IS5HSFAIT4FYXBMZZT8QDGNEUNO_314034_202181.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "many cows next to a body of water in a field"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000361238.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mbtyvccri0g5e-37TRT2X24QRIPQY6WXQCVBH8HB4BJ7_361238_544405.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a woman in a black jacket and a pizza"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000289229.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3ww5rws3x0c66-34Q075JO1XDAAMFS8SNH7UP3XRG10J_289229_254318.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a man in a white lab coat holding a hacksaw"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000437205.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mzgo81a5qd3ux-3P1L2B7AD1P83ABF3N7BHAUZTYHLOB_437205_307250.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a small boy in a blue shirt holding a banana peel"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000109055.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/ms7s6lss73vb7-3DBQWDE4Y6YCJ08IALRRLWYXHXRN5N_109055_626636.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a cat that is sitting in between bikes"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000303499.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/13/m28dv2tzcfv0jc-3MH9DQ757WCNUNHBOIM43OHGBGSGU8_66523_551854.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a bed with white and beige linens and a clear drape"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000119088.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/21/m3a1u9ymfa3llz-3VP0C6EFSGW2KMNW33MW0NNX8UMM6G_544605_750387.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a street sign is pictured against a cement wall"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000488251.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3weajm2ktbouh-3CP1TO84PT1GUEJPO2T7T8VZ6C152Z_488251_478941.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man standing in front of a brown horse near a small building"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000052412.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mttfe1bzj9l0v-3QIYRE09Y3HDVI7Z9YPFI6LG5JR1NS_52412_564549.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a jetliner wing flying over the top of a parking"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000132375.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3ewmhxlodd07t-32SVAV9L3F92L2RPNYYTKTGD43HA3Y_132375_570586.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a drink in a mason jar sitting beside a vase of pink flowers"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000177383.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/7/m1yyugdozopwof-3HRMW88U16Q76B1H1E6FRMD6V820MT_404249_396718.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a small boy rides a skateboard down the walkway"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000017627.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2pzxt03kotnah-30LB5CDZNCA75I00G3W9ZO9F7KWZ0Y_17627_637893.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "small parking lot of cars in front of a stone building"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000194746.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3r37lnf4xpnmu-38F71OA9GTWY3FG74WP0UT0AYZHMFK_194746_85966.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "the pizza is beginning to melt in the oven"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000058705.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3WMOAN2SRBXTHUNXGC94PUSWTRGNVS_58705_106845.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a man wearing a helmet while holding a banana next to a woman wearing a helmet"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000220764.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1govpedzbnfni-3YW4XOSQKQLQQ8R86Z11W1DADNHU1Q_220764_324116.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a person holding a pole in each hand in the snow"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000362682.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/5/m1qywd3fb9gx9r-3483FV8BEEJCDIJ3TL3CLE9Z183628_493442_577075.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a woman is skiing down a snowy hill"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000571804.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/2/m15yw127w9r2pl-3LJ7UR74RHDUNH4IWG8HG4SJEGJN4W_166478_377413.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a young guy using his computer and talking on his phone"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000334309.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/23/ms7s6lss73vb7-3AAPLD8UCCHMU6XX8JECJS7F7TGHTH_392818_490564.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a small dog wearing a flower print life vest"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000060770.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m15ds0tlsdbhmx-3RRCEFRB7MCSM8FLTQ38P9SPWOEB4X_60770_336941.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "two zebras in an open field with grass"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000052412.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1xd71ue0ctbw3-3OB0CAO74HPIBSWRIP1BR9ZUPT3YHR_52412_569628.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a big parking with a bunch of cars parked in it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000173302.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/5/m1t6bhidokz0q4-3PS7W85Z8Z2TF0FEA7QK6B9RPCR9TV_550322_243468.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a collection of kitchen utensils in a metal bowl"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000343934.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/14/m29trjokxf9t8s-3SLE99ER0ND8P53AEJTYJ55YY55ZBU_553339_771132.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a road that has signs trees and telephone poles near it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000104572.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m34v77cqevjfee-33IZTU6J811I1WKXONR8C9ZNXAMXSQ_104572_449674.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a bath room with sinks and mirrors"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000431545.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1ctq32sndw7z8-3TY7ZAOG5FKCGN1EG2TZC9L269L0KE_431545_570476.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a close up of a baseball player running on the base paths"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000426329.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m12w3un4iorg5w-34PGFRQONOBADTWHROM8IPUTRFIWJA_426329_669601.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "some frosted cakes with little decorations on top in a display case for sale"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000496597.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3722zayhgw89f-351SEKWQS0H15V2BDQUA6TXPL4CDMA_496597_49092.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "two people sitting on a small boat floating over the ocean"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000214205.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m11fk4htnnvxvd-3TUI152ZZBNYYFKRPQHWM0GLA1EQ1I_214205_407939.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "an airplane flying high with a full moon in the background"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000001818.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/12/m245obzpgptgkj-308XBLVESI4ZNEHJ42ZLX23ZPNXBRM_555009_243705.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a white desk with three computers on it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000475678.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/17/m2vcyzg03qiaqm-3M68NM076H7TH20O8BIDMMH7E4YR6W_64898_322003.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "two kids with red hair riding surfboards onto the beach"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000237984.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3QEMNNSB2XZIKSVO9KSLNB0ZT577D2_237984_500255.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a old green bench with a green chair beside it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000499768.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/18/m2c6x9xwezg7qn-3DR23U6WE5EAAW6VII66YPA2G3ETEK_137727_167100.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a cargo truck is loading a train with luggage"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000431727.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mruw3hnyyj6s6-3BXQMRHWKZYNMXDXZLKZGT1D81CUM1_431727_148529.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a pair of bears climb up in a tree"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000527695.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m146b9i98su2qf-3NPI0JQDAO5E7NVLRWMHS196UXZPTU_527695_367963.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "the broccoli platter in the dish is mostly eaten up"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000210394.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/11/m228enkx9s6gwe-35GCEFQ6I5O3MAZXZQES1M7MACD3ZQ_142472_691684.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a bunch of pedestrians as well as cars are on a busy city street"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000229858.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3TU5ZICBRD1G9F4COBQB1A3TLWNQ8G_229858_178866.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a deer standing next to a small deer in a forest"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000154004.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m27cxn5216p42e-32KTQ2V7RDFP25PU1AP8KXEZU9LM96_154004_651406.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "several people are shown sitting in the sand at the beach"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000290293.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/3/m15gxvhpg896x2-3OLF68YTN91X1E7I7TZVL25A7T6FAW_77460_308116.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a group of people in a field with various kites in the sky"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000498709.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m11odf5ma5mtw4-3PH3VY7DJLX0D81L9APU6JIJ83QZW7_498709_592825.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a view of a motorcycle the picture is very close"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000008762.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/6/m1ui8b2a4ow670-3RKNTXVS3MYN3YATNC3CUY8JIMD4AF_27620_400508.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a glass desk and chair with a laptop and keyboard"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000431545.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m11gtqron6cr5y-3R2UR8A0IAG334S4H6AZS4PMZAFXO2_431545_574262.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a baseball runner slows as he arrives at third base"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000389566.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3RU7GD8VPOTDSNI62WHRIWDKSEDSP0_389566_610865.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "the encompassing of an outside town in the picture"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000008762.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/21/m3terdvby3q89l-3KMS4QQVK2Q3DRG4JKCKFG869D9KF2_279927_613731.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a large crowd gathered watching another smaller crowd"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000466567.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/23/ms7s6lss73vb7-324G5B4FB38OL8UUX84Q84GGWM107E_468954_644748.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a man and woman are playing a video game"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000407825.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m102x1yz1jcso5-30ZX6P7VF8VO1DYAD4X1UIE4O4X2J1_407825_579204.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a clock shaped like an apple sitting on a counter"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000328030.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/15/m2p98h3lkipyva-3Z7EFSHGN9E2UFV1R118X4V575EXC2_172547_19519.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "some animals are grazing in a large field"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000281032.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2p98h3lkipyva-39L1G8WVWQR6REEPRF509SNAXVC319_281032_165219.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "an elderly person in a kitchen cooking food"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000167159.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m34w9n21hmvii1-39GXDJN2OTE8EN0TAAKL7X215588VZ_167159_573887.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a vintage baby doll with the book goody two shoes on its lap"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000017627.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m331w3l1lg1x6s-3T3IWE1XG6NZ7ZW0R916S6ZGYOHQT7_17627_637716.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a green car has parked on the curb in a parking lot"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000144300.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3wmv085v9ad0l-3U5JL4WY5K9ZZBIMYMVQE6JHIQTX4K_144300_633041.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a parked red motor bike on a lot"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000319369.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/22/m3e9bhy70rradp-30BXRYBRP4XEMN1R3OJN6CEEFRQWHQ_475904_454783.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a white duck is swimming in the water near a boat"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000009891.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1f3nsbh8a5wzu-3KRVW3HTZNL77KLTAGPZ1SJPQ09MSQ_9891_755779.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "man unloading luggage onto an airport cart from a car"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000464144.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m4ini7rhcmelq-39U1BHVTDLR70YQYTYPAFY65SWCT3K_464144_571115.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a skier is skiing in the winter snow outdoors"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000090108.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m126sp9gpwcfnl-3JBT3HLQF82ATZ4KXOBZEPANIOVPZD_194746_86743.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a pizza in an oven that had a lot of cheese melted off of it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000481159.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1ykqhcasseixf-3EJJQNKU9R59ERK54U9HJD6XK1PHRX_481159_5065.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a horse looking over a fence on a snowy day"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000177383.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m21vymau5be9ct-3R5F3LQFV2KS8ZSJIOYKPP5X79KZO6_177383_751949.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "yellow and brown train on the tracks looks it has eyes"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000284282.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/22/m3cvj8hshmpgqj-3A1COHJ8NJV3WM595L3HSWBSU6A8HF_349302_71888.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "two giraffes standing under a tree to get some shade"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000353027.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m123vkh8qsutm6-3LKC68YZ3A3OE4NE71EDQLBFR4RWOJ_353027_147133.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man pulling a slice of pizza away from a pizza"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000414340.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/5/m1txs5zz2u16sz-37WLF8U1WPQ7UYNTIR9GLAVTJ6E6KG_161008_308082.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a pair of scissors sitting on a wooden box on a table"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000420840.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/7/m1yvvtrqwasgug-32RIADZISS4R3UWN4CP3FY2P4ACS4R_545007_663289.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a clock tower with a statue on top of it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000497628.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/ms7s6lss73vb7-3WOKGM4L71GV6EMTEULSJQ2BMHOO0H_497628_316416.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "the bed in the room is clean and ready for its guests"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000551822.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m15gxvhpg896x2-3JRJSWSMQHL5BF81330FGC64M433EF_551822_633418.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a sandwich and sauce on a white plate"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000042889.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2mb6kxtkcv0c6-3U8YCDAGXPGYRQZ9T7DW8VXB52UQ0N_42889_482470.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a very cute teddy bear with glasses and some items"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000527695.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m3thsn38eer3vh-3QY7M81QH7MQYY1Y11E8ROVPDAH7KK_527695_365179.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a white and blue plate full of broccoli"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000042889.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/20/m3orxfdski8iux-3OS4RQUCR9F2KOCAD7ZRGDPGLEOFBQ_47112_31204.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a dining table has a large pizza and wine glasses"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000364884.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/m27mw750k9y9wt-34Q075JO1XDAAMFS8SNH7UP3UPU01P_229216_382042.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "two women sitting on a ledge deep in thought"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000319534.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/23/mwfcp9k44s495-3EO896NRAWVILCSNWJHXV5NUNNEJTP_349860_410146.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a man has jumped a ramp with his skateboard"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000232649.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3LO69W1SU3DKBXUHF4U3C1LX2K5GLE_232649_285789.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a blue toilet sitting next to a bathroom sink in a bathroom"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000185802.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m10ezttvtjtzv-3NL0RFNU0FNTFBJFZ3G1OCBFJI1K44_185802_281255.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a giant banana is sitting in beach chair which is overlooking turquoise water"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000318114.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m3z7gko9hnu6gd-3HWRJOOET529VWTGTI1GOKEGH45SE6_318114_172946.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "banana bread and orange juice with a orange wedge is sweet tasting"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000420840.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/18/m2dt2jj9kkp38l-33LKR6A5KEKBQVCJ6UBRR5R8U8IT14_33707_376285.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "two giraffes stand together near rocks and a building"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000510329.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/20/m37a30ixxtq8cf-37C0GNLMHF3ZGSHJQANTHX03Z1YD6Z_39551_756091.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "the tennis player wearing a purple outfit is about to hit the tennis ball"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000453860.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/20/m37a30ixxtq8cf-30BUDKLTXDV8PUV5HMO8V94O37B5E5_236730_306543.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a zebra standing on a dry dirt lot"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000331799.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3GA6AFUKOOOHVPZ3T48L1F3HHY53H0_331799_721206.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a woman standing in front of a box handing a woman a bag of food"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000414385.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/6/m1y0p7kxxra2oq-3EQHHY4HQSS698RWNHEWVCP0CXKG57_300039_674621.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a woman takes a picture of the beer on the table"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000528862.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m15yw127w9r2pl-3CFJTT4SXTQZS3BA1YTWE8GCJMJI75_528862_338615.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "several giraffes spending time in an enclosed grassy area"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000255917.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/mid2t6rds5niw-30H4UDGLT2IAUS51IZL56NEE08RPMJ_491497_394668.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a pillow covered reading chair in the corner of the living room"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000128658.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m304gvmq051v89-3EG49X351UCDEYM0HAO5B43VF7LX68_128658_420106.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a plate has a waffle some fruit and ice cream on it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000412894.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/22/m5noeebo180ms-3OWEPKL089CR64M1YE6OJGCT50KN7R_529122_752654.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a boy and a woman are in front of a counter"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000041633.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mg8gbyx2u711d-3JPSL1DZ5SZ9P6KZ5HI8T7EJ69ANA4_41633_3872.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a burgundy lowrider showquality hot rod chevy truck"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000552902.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1z6dx4taf02w4-37TD41K0AH9UYY92XRVLYWEDSRSSC7_552902_701191.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a zebra eating grass in a grassy yard"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000099810.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1i6emre6n6j97-31Z0PCVWUKFP1HRLVAS0SFILWP9T7H_99810_767578.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "small certain was child eating donuts off a blue plate"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000431545.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/19/m2l4ufdlb76up1-3S3AMIZX3U5OW9QK010W3XSDVJ7DCH_479732_115753.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "vegetables spill out of a sandwich sitting in a box"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000052591.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/10/m1h82eis9ymnsa-3ZAK8W07I4EQJJ6QAGX1IWU080KU06_301061_85015.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a person is moving green hay towards an elephant that is inside the back of a white truck"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000196185.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/m1xjh0o5h4dyeo-3LOZAJ85YDDPWX3ZU8C2LRMJW2J2XT_250127_658929.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a young woman is waiting in the rain"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000340175.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1uh1oxza5h2nq-33L7PJKHCGYT1VW45FK66FEG4BQ8TV_340175_558187.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a living room with a big table next to a book shelf"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000426166.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mx6dytiz3lwj9-3GLB5JMZFXV1DL6P2NWFTODSS40DGD_426166_512313.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a bicycle that is tied to a post along a sidewalk"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000058705.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/2/m15yw127w9r2pl-3TMSXRD2X603ICGD1ZG2EPY7VPZ1W8_507042_692812.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "three giraffes looking in different directions from each other"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000405970.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2pdypl5t2gjo2-3WMINLGALB3QY2N8GR9UDWK79BQACH_405970_494297.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a large living room with a wall painted green"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000320232.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/18/m2zco7272xa5e0-3I7DHKZYGN00V8V1CX8EDRPOBJOF5D_180878_622639.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a table with a happy birthday message holds several items that were probably birthday gifts"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000047571.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m16gihrh9er9rx-3137ONMDKG565RKPYI7ZZ6VDQWWEGW_47571_120515.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a horse drawn carriage being pulled through the mud"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000090108.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/m1l28zgji06fng-32AT8R96GL9QKSQ2JIGLS20J0AWSUH_151938_379709.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a lone woman huddles under her umbrella at a picnic table in the rain"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000447465.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3DOCMVPBTNEG9PEOELNDFDYJ8WBNNP_447465_63260.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a person on skis with ski poles standing at the top of a hill"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000117525.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2lka0rttqeb4v-3QY5DC2MXRKHGNLGDYGML3H4ET6UFP_117525_180055.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man in front of a christmas tree with his dog"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000046031.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3ntnpq1hor19g-3QBD8R3Z21JC524UXV8UVTSO49M4OS_46031_749580.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a laptop screen displays an image of bright green leaves"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000127624.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1bee3yy3uph7p-3PM8NZGV8YG6OG5KWJ23H9KMBDCXQQ_127624_342542.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a train next to a train station with city in background"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000032735.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2e29njkb7ltr3-3XIQGXAUMC8WIY050KTXHI3RA8CX7U_32735_406786.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a person jumping a pair of skis in the air"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000526392.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/14/m2me2l9b0hh2zb-3ND9UOO81K2G8JKHU9YLY47LTEILWE_430377_536592.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a lady that is on some skies on some snow"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000511453.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/12/m25rt7bjo446ds-3DHE4R9OCWBEAB895YCYX5MOTTN2GZ_499775_219254.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a yellow and white bus a red car a street and some houses"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000417465.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/3/m19068sc13ac4v-3JWH6J9I9SDE8GPRBVI4OIYPO82BNS_266409_537203.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a man that has skis and is standing in the snow"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000493613.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/ms0xhqb7av0tr-3GS6S824SQX6WJN26ME0BWBA179NW8_493613_570775.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a pair of shoes flips over a skateboard on a road"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000262938.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/4/m1or6g3sr7plqv-36V4Q8R5ZK0VU46GIC0U6S4O5KBMQ1_427649_124060.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a boat on a still lake in front of a mountain"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000151662.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/4/m1mv2s7vjjv8gv-3IAS3U3I0FGIJU0YPCKKWUVRDBSB2E_509131_58562.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "apples and oranges and a tree with bananas in front of potted plants"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000297698.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/5/m1txs5zz2u16sz-3EQHHY4HQSS698RWNHEWVCP0GZK5G4_99428_543864.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a laptop with some other electronics on top of it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000504389.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mr2dhrdsb7sjj-3MMN5BL1WZ43N30EK1OYTKBDYC23MR_504389_588108.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "adults enjoying game of badminton on grassy field"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000494759.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/9/m1ctq32sndw7z8-3TDXMTX3CBUGO3X5IOO48UGGR8OI6S_159282_541200.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a glass vase with some flower coming out of it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000266981.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1bqqrrs1kx103-31Q0U3WYDPFOSXFCTR8QM9ZQ5BU173_266981_603190.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "outside view of the mgm grand in las vegas with people sitting and walking"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000313034.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m29l7oow4wnodg-3SNLUL3WO4N3GFVCZ9JKGGMOA8DULG_313034_473001.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a woman is talking to a man and holding a plate with a piece of cake"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000370478.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/4/m1or6g3sr7plqv-3WEV0KO0OMS43QFGXNQWI2WVEU5SDF_542089_156719.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a bathroom is outfitted with brown and white"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000243626.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/19/m32mlmn3m00iqr-3XXU1SWE8MV64ASS4BGAIVI6YW80AB_455597_503680.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a person in a kitchen with a stove some pots and pans"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000391648.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12e12lh0tbjh9-339ANSOTR52NNIIUKSI5IDLYS2EKI3_61333_89537.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "an image of a cat sitting on the bed with a blanket"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000152465.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m4ssqr7fl6wre-32XVDSJFPZXE2L4VG1ATAA6LS3ZM26_152465_776996.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "an old fire hydrant with a bicycle tire on the ground around it on the side of a road"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000479155.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/14/m2mb6kxtkcv0c6-3ZQIG0FLQEGJ4OWB8D0RLD5NNZ8WV9_204329_78339.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a surf board in the sand near a body of water"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000009891.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3JBT3HLQF82ATZ4KXOBZEPANH08PZD_9891_752894.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "the man is loading luggage suitcases onto the cart in the parking lot"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000377588.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3otqbq7nctek7-3TVSS0C0E104RWSM3I0CKVH2N7YWTI_377588_307926.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "pitcher in a baseball game tries to out a player attempting to steal base"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000347664.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/11/m2251j4l0421hq-3NVC2EB65QZ3HKPS34C9NFYK7N5Y33_304560_79596.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a black cat is staring directly into the camera"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000560911.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1z21u2zkfi2h9-3N1FSUEFL50L15D57MUEKZGP0RC4DK_560911_449311.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man sitting on a black leather couch messing with a phone"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000223130.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3a1xconzsjx7b-3TYCR1GOTCJK2EPM5WISX813AGNLZ5_223130_710231.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a giraffe that is standing in the grass"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000521141.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/10/m1k2zdm5npnlr8-3KOPY89HM82DMVUTH4B6DSJWQLK3J2_455872_412942.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a black and white image of a shipyard with some boats"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000384350.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2fu9oshs147yl-30BUDKLTXDV8PUV5HMO8V94O3JSE5J_384350_479894.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a red and white plane sitting on a runway"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000504389.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/3/m19068sc13ac4v-3MH9DQ757WCNUNHBOIM43OHGBOAUGK_101884_748826.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "the large brick building covered in vines has a clock on it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000006471.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m101ooe1o3phic-39LNWE0K4UW1QEN6E6M7O0UWRA8UI1_6471_743219.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a baseball player in a batting stance at home plate with the catcher in position to catch a pitch"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000560911.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/5/m1qywd3fb9gx9r-3JPSL1DZ5SZ9P6KZ5HI8T7EJ80KANL_77595_208678.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "an orange cat with pointy ears is on a bed with a laptop computer nearby"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000001761.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/8/m1cqc64q22fzgl-3W8CV64QJ2Z3AROJAD9VSS63HWDH95_118515_19289.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a spotted kitten sitting on a wooden bench"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000473015.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/10/m1h82eis9ymnsa-3LKC68YZ3A3OE4NE71EDQLBFR6AWO6_473821_543341.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a room with a chair couch mirror and lamp"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000049259.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/7/m1zad6q0if0k14-30IQTZXKAK6I0N0LFL8O23T4Z100X2_404479_560658.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a large airplane is ascending from the runway"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000288584.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/5/m1rmj47bqy7r2j-3P1L2B7AD1P83ABF3N7BHAUZS18LO7_376278_109896.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a zebra lays down in a leafy field"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000257478.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m15gxvhpg896x2-3XIQGXAUMC8WIY050KTXHI3RAU97X9_257478_408295.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man in white baseball uniform on a grassy baseball field"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000044652.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1tfrwjf2efim3-3Y4W8Q93LZKKVIWKRIV1TPG2CZ0VDP_44652_24695.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a flying plane with smoke trailing behind it"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000303908.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/17/m2v84mgw1shfqm-3R08VXYT7CVHTYVFQFXBFZPKX1F7W6_29675_191552.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a close up of three hot dogs with mustard"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000289229.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/me1wlqd3fuivx-3O6CYIULED1JR97B3NQMPR348TEUWY_289229_256511.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "the male doctor is holding a medical saw"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000314034.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m3pkseuizpn2qf-3J4Q2Z4UTY3RZG0UME6HFQ6F5G6QW3_314034_204416.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a herd of cattle standing in grassy area next to water"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000362682.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/miw7lhr05hyjm-3LRKMWOKB5HE1S3EVQ0NIC2U6Q32ZQ_50896_583968.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a big bowl of oranges that is on the floor"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000144300.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/17/m2xvfotbdlxse7-3QHK8ZVMIMIOKGM6ZI2PBQU0M7SBL7_561366_111220.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a cell phone on top of a calculator near a computer keyboard"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000447465.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m4ini7rhcmelq-39K0FND3AHF37OZZTG38GSPJ4ZLMAN_447465_65594.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a person on skis standing behind a starting gate"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000140987.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3eqvhcg4imwxw-3L4D84MILZSSG31SWP08HMK6CO8JHU_140987_212730.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a smiling girl has a small plate of pizza"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000047801.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/3/m15gxvhpg896x2-3TMSXRD2X603ICGD1ZG2EPY7VU6W1K_407524_651863.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "pasta with sauce and broccoli on white plate"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000479596.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/22/m3bt1xfw6pzsnj-3E7TUJ2EGCMMYBJHOHQ517R5L50D96_24144_225817.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a cheese pizza cut into eight pieces in the box"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000214720.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mjmm9dil7zpyy-3T111IHZ5EQGZL2M6ILP6W893X5R9P_214720_568427.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man at home posing with a cake"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000014007.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/4/m1or6g3sr7plqv-3KRVW3HTZNL77KLTAGPZ1SJPNL5SMV_492905_750444.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a desk sitting on saw horses with a monitor keyboard and mouse"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000173302.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1398enww3zl0i-3QY5DC2MXRKHGNLGDYGML3H4E2FUFG_173302_275974.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a busy kitchen next to a dining table and counters with chairs and tables"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000315257.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1cdvspzow0dxw-3DR23U6WE5EAAW6VII66YPA2J83TEM_315257_721763.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a blue bird sitting on top of a bowl of seed"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000334309.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/18/m2i19tl03ql7q9-3UNH76FOCS5436X29MHOJE7Q7EAMYV_542625_431170.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "outside photo of parked cars with a blackbird sitting on a parking meter"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000320232.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1kheo0xvkyrqt-3OUYGIZWR7YDREYUTUY78Q71X5PP07_320232_462707.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "the roads are wet near a public parking booth"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000483531.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/m12xyeia1z554i-3LS2AMNW5FQJU7RSVSF7YWTT3H4OQT_358427_52738.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a piece of broccoli on a metal fork"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000152465.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/22/m3bt1xfw6pzsnj-3E7TUJ2EGCMMYBJHOHQ517R5L50D96_24144_225817.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a cheese pizza cut into eight pieces in the box"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000458663.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1t315vvgx7oip-3LKC68YZ3A3OE4NE71EDQLBFR9ROWL_458663_815970.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a kitchen and dining room with an ocean side view"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000112997.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/19/m364yza8zcucsd-3YWRV122CSZ818BT5KAGBB0IQLKU8Q_170191_468028.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a young man wearing an animal print robe next to a white bed"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000022705.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m11eivx3y0ffxm-3WI0P0II61SS2BF3IYQRSSRZVB4DRZ_22705_540580.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a woman is standing and smiling as she holds a wine glass in front of a refrigerator"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000210394.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3R3YRB5GRF3MKW482IUU726DHGJAUO_210394_570037.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a bunch of people who are standing outside a bus"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000531707.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/19/m34huwaokmi0r3-388U7OUMF71Y3RE96M2WDL7G0BI0RT_553731_773648.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a man dressed as a clown talking on his phone"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000052412.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2maclcdhqopmp-3OONKJ5DKCJWQ1P3CAIH37YUGECOBJ_52412_564657.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "view from an airplane looking looking out over large parking lot"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000220764.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/19/m34v77cqevjfee-3OWEPKL089CR64M1YE6OJGCT8IXN77_383443_255502.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a bathroom with a white tub and white cabinets has a black pattern on the floor"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000467776.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/4/m1np82auxxqj4s-3GNCZX450IN9SRWCLWV0XWJE6O1PAS_234807_739934.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "horses are standing in the grass near a fence and stone structure"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000009448.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3PMBY0YE273CTJD3OLVUH19N42P9C1_9448_802608.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a little girl holds up a big blue umbrella"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000217400.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/12/m253dwe1ylfsrg-3VAR3R6G1P1DO3RW9YY7C58T9YBO8L_514797_783824.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "at the seawall the gentlemen are holding up the kite"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000054967.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/15/m2p0euieqzigaq-308Q0PEVB8D36MZ3GVRGS47IHW69IE_275749_367351.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "small gathering of people at a friends house for drinks"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000506279.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/4/m19fp2v8506m4k-3907X2AHF05KNO188H2LRE0GW4XP2J_308753_651692.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a cave located on the side of a mountain side"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000032038.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/18/m2fux05bfkoyzb-3PMBY0YE273CTJD3OLVUH19N1Q59CQ_474078_285354.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a baseball player swinging a bat a a baseball"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000097022.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/5/m1t6bhidokz0q4-3VNXK88KKCI8SSJ3F2QZ7TXCLI1V9W_466125_767454.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a little girl that is standing under an umbrella"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000512330.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m228enkx9s6gwe-3OE22WJIGIOE7CBPRERGX2LILGVQU1_512330_572844.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "an open refrigerator filled with lots of food"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000105455.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2p98h3lkipyva-32KTQ2V7RDFP25PU1AP8KXEZUFE9MY_105455_103633.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a large red clock tower is on a street sidewalk"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000408830.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/19/m32pjode5tlsr3-3NQL1CS15R8NTTRBNEIF6AQ3UL9VYW_7511_159209.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a guys stands in the sand playing with a kite"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000147745.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3mu2kfdnj4txh-35GMH2SV3EHUX41NM1K74F4FHUEEOY_147745_801635.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "two commercial trucks trucks parked next to each other"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000453860.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m19rw7skrmc7s9-3TU5ZICBRD1G9F4COBQB1A3TLZ3Q82_453860_482520.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a couple of pieces of very nice looking luggage"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000292908.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1tfrwjf2efim3-3OE22WJIGIOE7CBPRERGX2LILPOUQG_292908_453018.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a person on a surfboard riding a wave in the ocean"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000043737.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/13/m28dv2tzcfv0jc-3FQ5JJ512LOF1JTLH8VXNLHFA11NKB_151857_446945.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "an old train is going down the railroad underpass"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000442456.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1gwaqytsed8ar-3WQQ9FUS6AUT592N5TTL6L8U2DM8BN_442456_392517.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "man in red shirt and black pants standing next to white flatbed truck cab next to a set of new tires"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000394328.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m12xyeia1z554i-33CUSNVVNNCA3B4G2H9BFJITTH1887_394328_654378.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "this bathroom has red and white walls and a poster"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000132375.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m15gxvhpg896x2-37UEWGM5HT8YAGZR256WRQQQ5K0R1X_132375_566653.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a glass vase with three pink flowers and a drink"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000431545.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/ms7s6lss73vb7-3TXD01ZLD4H7I7ORTHK3UZKDO3Q4U3_431545_570440.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a man that is standing next to a base"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000060102.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1wp8frbib5dxy-3NS0A6KXC484GMBLU5LCKGWVDQZZGS_60102_174141.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a man hitting a soccer ball with his head"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000542856.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/18/m2dt2jj9kkp38l-3A1COHJ8NJV3WM595L3HSWBSXKK8HK_115245_233493.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "luggage and coats lined up at portable tables"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000288042.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1398enww3zl0i-352YTHGROVD2DLI7TID4BKVNV444H1_288042_306265.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a man walking with an umbrella near a railing above a river"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000461405.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m1yqqk4y9sb8nj-3K2755HG5S3VZLAUR3QXB3DGPIRDFP_461405_238165.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "there is a group of sheep grazing on the grass in the field"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000234366.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m123vkh8qsutm6-3VHHR074H3H1I4JGM1Q96O8439N7LN_119088_2591.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "two men playing frisbee in the water one man walking away"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000095843.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/22/m3cvj8hshmpgqj-3A1COHJ8NJV3WM595L3HSWBSU6A8HF_349302_71888.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "two giraffes standing under a tree to get some shade"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000219283.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/me1wlqd3fuivx-3PH3VY7DJLX0D81L9APU6JIJBC2WZ1_219283_705485.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a teddy bear is sitting in front a group of boxes"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000009891.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/2/m14bcecxti4bu6-37UQDCYH6XV4E9VA05SFTX66R047V4_283520_441729.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "two people standing in front of some trees"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000068409.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/7/m1yqqk4y9sb8nj-3U0SRXB7CD51ON6G7S4UOX3T5S5NRC_221017_596976.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a rooster is walking on a grassy beach"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000122217.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/13/m27mw750k9y9wt-3OS46CRSLFZLA9H5HVNNNJ6AWWN6VL_313130_323756.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a black and silver fridge next to a large mirror"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000047801.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/17/m2xvfotbdlxse7-3H0W84IWBK2XUHT3ET1BYA9KUCMERN_226592_159972.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a bedroom done in animal prints with red and black pillows"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000539883.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/2/m15by8wmv40jh7-3ZPBJO59KP1FDH10MTEXEOPT8B2HDK_424545_393496.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a striped cat sleeping on a sunny window sill"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000057597.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m123vkh8qsutm6-3LPW2N6LKT2PEQSR7M3DZRQRPUP5U4_412894_67910.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "people cross the street at the corner of luxury buildings"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000447465.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2pzxt03kotnah-33FBRBDW6OZCFEUTJTPGH5ECGWGC8K_447465_57950.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "skiers line up for the starting gate at a race"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000481159.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/23/mrfj47f07serc-30JNVC0OR9K92Q55RFKHENWSOVUQH8_437392_69484.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a toilet that is decorated like a pink stuffed animal"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000325114.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/11/m2196iozplgxre-3U4J9857OEBP5VX4VO1FSLCZQG8B74_459887_274082.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a group of slightly bruised bananas in a bowl"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000217400.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/23/mn98ese280fk5-3JAOYWH7VI45WN791YOTKKZAYDN9L5_185409_372522.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a herd of zebra in a grass field"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000172877.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/22/m3c4ozifbjg1y7-3P529IW9KYLEXXYM4KOLUGLLXDCLF3_255401_279532.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "this bathroom which includes a bidet is very dirty"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000497628.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3a1xconzsjx7b-3018Q3ZVOIQU44CRYOZP2RJTAH9ARE_497628_314733.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a bedroom with a bed that has an ornate frame"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000110999.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/2/m15yw127w9r2pl-31EUONYN2V3BZFN9HR9H4JS1KWIVO3_414673_99303.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "boys playing baseball with one little boy at bat and parents watching"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000473015.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/12/m253dwe1ylfsrg-3U5JL4WY5K9ZZBIMYMVQE6JHG9OX4F_481386_472062.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a person behind a table in front of many dishes"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000055022.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/12/m253dwe1ylfsrg-35USIKEBNRG8248WF41WD2CXD7CN67_451144_551987.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "two skiers looking out at the view from a high cliff"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000224724.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m304gvmq051v89-3F1567XTNW5GNKNMT3WP16Y45YOQ9G_224724_215245.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a chinese street showing business signs hanging over the store fronts"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000551439.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mlmtyp4dfhq89-3UJ1CZ6IZHP9ZD0NIH8DKQ8337QS5G_551439_439590.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a woman holding a red book in her hand while sitting on a bed"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000402519.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/m1s5gapfhi13yy-3KB8R4ZV1E78YO85OR59YYPSMN4GBU_379476_83846.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "the table is very long and dimly lit"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000384350.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3XCC1ODXDLBMRKJ8NKWBSRYYB8QRQC_384350_478499.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a large jetliner sitting on top of an airport runway"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000088432.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m3sidptmgddzw6-3IRIK4HM3AKPB0B5QESIXRILE2X6C5_88432_290996.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "the street corner of a busy intersection in a town"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000394611.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m2k9lt1omqck4n-3PS7W85Z8Z2TF0FEA7QK6B9RPTG9TI_394611_10427.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "giraffes are grazing in a field of grass"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000117719.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/my36w1vkw4eo1-3H7XDTSHKCR0MR6GJI15LC2DWY3WGR_117719_5674.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man sits at a bar in front of dozens of bottles of alcohol"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000375078.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m19cmjjvtwshyv-32Q90QCQ1SLB7DG8O9SEXYN60WZEKQ_375078_13801.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a dried black flower in a long tall black white vase"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000104572.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m2v84mgw1shfqm-3IFS6Q0HJIJLB1V2P1R79ZC86FASIA_104572_454450.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "this is a commercial bathroom with orange and gray mosaic tiles and three sinks"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000351823.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/m12xyeia1z554i-3MAOD8E57QALLJSSGRAMDFPHAYINXI_440171_625234.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a man riding on the back of an elephant along a dirt road"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000301563.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/8/m1bqqrrs1kx103-3KV0LJBBH2LV65LGVRPUBIET1NORMG_109827_87710.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a brown teddy bear sitting up against a tree"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000235064.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/my36w1vkw4eo1-3MHW492WW0DNZC2X4BU4CF3WL11VMS_235064_318363.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "the big bear is outside walking across the concrete wall"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000389566.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m37a30ixxtq8cf-3A4NIXBJ76ZK37Q3Q7SYP65UY6GMLR_389566_619853.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a herd of sheep are blocking the road"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000143998.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3RGU30DZTA8E8H23NMGL9RS2TXOMJX_143998_665020.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a group of carrots with an organically grown strip tag around them"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000194746.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m3a1u9ymfa3llz-352YTHGROVD2DLI7TID4BKVNVY24HN_194746_87916.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a food being cooked in an oven that has overflown"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000175535.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1h82eis9ymnsa-3NVC2EB65QZ3HKPS34C9NFYK7UZY3B_175535_428590.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a white plate topped with two pieces of food next to white dishes"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000201418.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/m1h82eis9ymnsa-3KYQYYSHYV7P5YNNQ69NYXMUJV4ODY_541952_166067.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a large clock tower on top of a church building"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000073533.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m135oskcs2l9nh-3NGMS9VZTLISX5OQI9LNZ3G11HRFFU_73533_775892.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "a couple of kids in yellow shirts are sitting together"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000174123.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/m1m3v5ujoy8m7j-3XLBSAQ9Z4CLNT0K12HIE3J2ETQZ7B_24243_281410.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a woman with a beer sits inside a refrigerator on the footpath"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000510329.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/mg1iftj4y6ebj-3UNH76FOCS5436X29MHOJE7Q49QMYY_505451_362353.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a person on surfboard riding on a small wave"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000288042.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m1nm1thr140ao7-3BWI6RSP7G9NCY9O5F35TTIKUCVE7G_288042_303385.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "a man standing on a street with a umbrella"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000394328.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m317e7kgfce8k1-3WJ1OXY92AGOMZXVZFTPBAOOHY1A8X_394328_640719.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "there is an indoor toilet underneath a sign that says please flush"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000528862.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/mgzuigxjkr7at-33CID57104TJHLITKPNJT7WOV7OL3G_528862_331073.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "Yes",
        "text": "in a large area many giraffes stand behind a fenced area"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000201418.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/mzoowzr1pwdla-3NQL1CS15R8NTTRBNEIF6AQ3V65VYZ_201418_624278.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "adult standing at luggage carousel in airport area"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000378116.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m6mn1a0gkn7bu-3NL0RFNU0FNTFBJFZ3G1OCBFNF94KU_378116_464649.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a person riding a wave on top of a surfboard"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000017627.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m135oskcs2l9nh-3C8HJ7UOP7U48W7758J7XLGSALDZML_17627_634836.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "Yes",
        "text": "several cars are parked in front of a building"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000032570.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/0/m4ini7rhcmelq-3WR9XG3T63B5KWCVG9RSK76TGB074D_32570_252179.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "a man on a surfboard inside a large wave"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000432085.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/24/m12e12lh0tbjh9-3XCC1ODXDLBMRKJ8NKWBSRYYERGQR6_407298_409595.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a young ball player poses with his baseball glove"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000123321.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m135oskcs2l9nh-3TU5ZICBRD1G9F4COBQB1A3TKFSQ8M_123321_828325.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "Yes",
        "text": "the bowl of soup contains a large amount of broccoli"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000008211.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/16/m2rzsxg8nm0zzj-3MYYFCXHJ37ODPNW9CVMQKJMC1L4GM_60090_113332.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a yellow passenger train pulling up to a station"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000395343.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/9/m1fots9j4euzte-3IO1LGZLK9XNZXLS92CDV5M2LVQ68E_301135_505666.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a group of women sitting on a bench near a light pole"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000268378.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/20/m3722zayhgw89f-3Z4XG4ZF48R0IC5OAPL3VIZMIWSX8W_145597_437725.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "a group of woman sitting around at a picnic"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000165500.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/21/m3wmv085v9ad0l-3R08VXYT7CVHTYVFQFXBFZPK1ONW7H_214869_436874.wav"
        ],
        "question": "Are the audio and the image about the same thing?",
        "answer": "No",
        "text": "a woman sitting down with a large cell phone holder on her pants"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000140583.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/1/m12xyeia1z554i-3OONKJ5DKCJWQ1P3CAIH37YUEI6OBJ_333956_761814.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "a parking meter beside a walkway near the water"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000475678.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/8/m1bqqrrs1kx103-3COPXFW7XBCF445YXN8PRAMAB35KPI_494913_634847.wav"
        ],
        "question": "Is the audio related to the image?",
        "answer": "No",
        "text": "residential living room with fireplace and large television"
    },
    {
        "image": [
            "data/images/vqav2/val2014/COCO_val2014_000000290843.jpg",
            "spokenCOCO/SpokenCOCO/wavs/val/4/m19aricqa6ak6i-3PMBY0YE273CTJD3OLVUH19N1QIC96_47828_167746.wav"
        ],
        "question": "Is the audio describing the image?",
        "answer": "No",
        "text": "people walk by a boat near a lighted bridge"
    }
]
