[
    {
        "video": "attribute_1",
        "captions": {
            "2": "Three individuals dancing lively in front of a white pavilion.",
            "3": "Four individuals dancing lively in front of a white pavilion.",
            "1": "Two individuals dancing lively in front of a white pavilion."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_2",
        "captions": {
            "2": "Three orange trucks at a traffic light junction.",
            "3": "Four orange trucks at a traffic light junction.",
            "1": "Two orange trucks at a traffic light junction."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_3",
        "captions": {
            "2": "Five dogs, including two large grey Wolf dog, a black and white dog, a white puppy, and a brown puppy, all on leashes.",
            "3": "Six dogs, including two large grey Wolf dogs, a black and white dog, two white puppies, and a brown puppy, all on leashes.",
            "1": "Four dogs, including two large grey Wolf dogs, a black and white dog and a brown puppy, all on leashes."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_4",
        "captions": {
            "1": "The video shows red, green, white, and blue just salad bowls.",
            "2": "The video shows red, green, white, blue, and yellow just salad bowls.",
            "3": "The video shows a rainbow of just salad bowls including red, green, white, blue, yellow, and purple."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_5",
        "captions": {
            "2": "A black and white dog jumps over a grey basket on a moving bicycle.",
            "3": "A black and white dog jumps out of grey basket on a moving bicycle.",
            "1": "A black and white dog jumps into a grey basket on a moving bicycle."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_6",
        "captions": {
            "2": "Four individuals take turns at a climbing gym.",
            "3": "Five individuals take turns at a climbing gym.",
            "1": "Three individuals take turns at a climbing gym."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_7",
        "captions": {
            "2": "A black dog jumps from the front passenger seat to the backseat and stays there.",
            "3": "A black dog jumps from the backseat to the front passenger seat and stays there.",
            "1": "A black dog jumps from the front passenger seat to the backseat, and back to the front passenger seat."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_8",
        "captions": {
            "2": "The video shows a badminton game where the shuttlecock falls to the ground six times.",
            "3": "The video shows a badminton game where the shuttlecock falls to the ground seven times.",
            "1": "The video shows a badminton game where the shuttlecock falls to the ground five times."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_9",
        "captions": {
            "2": "An individual in a black graduation gown applauds 20 times on stage.",
            "3": "An individual in a black graduation gown applauds 25 times on stage.",
            "1": "An individual in a black graduation gown applauds 17 times on stage."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_10",
        "captions": {
            "2": "Two people pass a rugby football six times on a sunny day.",
            "3": "Two people pass a rugby football eight times on a sunny day.",
            "1": "Two people pass a rugby football five times on a sunny day."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_11",
        "captions": {
            "1": "A person in green performs five push-ups in a sports equipment-filled room.",
            "2": "A person in green performs six push-ups in a sports equipment-filled room.",
            "3": "A person in green performs eight push-ups in a sports equipment-filled room."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_12",
        "captions": {
            "2": "The individual throws an equal number of right-hand and left-hand punches while practicing boxing at home.",
            "3": "The individual throws more left-hand punches than right-hand punches while practicing boxing at home.",
            "1": "The individual throws more right-hand punches than left-hand punches while practicing boxing at home."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_13",
        "captions": {
            "2": "An individual in a white vest performs four sit-ups on a black mat in nine seconds.",
            "3": "An individual in a white vest performs seven sit-ups on a black mat in nine seconds.",
            "1": "An individual in a white vest performs three sit-ups on a black mat in nine seconds."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_14",
        "captions": {
            "2": "A mattress expands to a larger size after being unsealed and shrinks again shortly after.",
            "3": "A mattress shrinks to a smaller size after being unsealed and exposed to air.",
            "1": "A mattress expands to its original size after being unsealed and exposed to air."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "size"
    },
    {
        "video": "attribute_15",
        "captions": {
            "2": "The butter evaporates as the garlic is saut\u00e9ed,",
            "3": "The butter freezes as the garlic is saut\u00e9ed,",
            "1": "The butter melts as the garlic is saut\u00e9ed,"
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_16",
        "captions": {
            "2": "An orange chameleon changes color from dark red to green as it crawls forward on a tree.",
            "3": "An orange chameleon changes color from dark red to blue as it crawls backward on a tree.",
            "1": "An orange chameleon changes color from dark red to pink as it crawls forward on a tree."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_17",
        "captions": {
            "2": "Ice slowly sinks in water as it freezes inside a glass.",
            "3": "Ice slowly sinks in water as it expands inside a glass.",
            "1": "Ice slowly sinks in water as it melts inside a glass."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_18",
        "captions": {
            "2": "A man removes rust from metal strips using a solution, changing their color to black.",
            "3": "A man causes metal strips to rust using a solution, changing their color to brown.",
            "1": "A man removes rust from metal strips using a solution, changing their color to gray."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_19",
        "captions": {
            "2": "Heating sugar over a candle flame causes it to melt and change color from white to brown.",
            "3": "Heating sugar over a candle flame causes it to melt and change color from yellow to green.",
            "1": "Heating sugar over a candle flame causes it to melt and change color from white to yellow."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_20",
        "captions": {
            "2": "Clear liquid in test tube becomes cloudy and fizzes violently after carbon dioxide introduction.",
            "3": "Clear liquid in test tube becomes cloudy and still after carbon dioxide introduction.",
            "1": "Clear liquid in test tube becomes cloudy and bubbles after carbon dioxide introduction."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_21",
        "captions": {
            "2": "A person wearing an orange glove drops white objects into glasses of water, causing the water to turn a milky white color.",
            "3": "A person wearing an orange glove drops white objects into glasses of water, causing the water to turn purple in color.",
            "1": "A person wearing an orange glove drops white objects into glasses of water, causing the water to emit a white gas."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_22",
        "captions": {
            "2": "Super Mario climbs from last to second position, getting close to the princess in a hurdle race.",
            "3": "Super Mario drops from first to last position, falling behind the princess in a hurdle race.",
            "1": "Super Mario climbs from last to first position, overtaking the princess in a hurdle race."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_23",
        "captions": {
            "2": "A man in a white t-shirt and gray shorts lifts two black dumbbells five times each.",
            "3": "A man in a white t-shirt and gray shorts lifts two black dumbbells six times each.",
            "1": "A man in a white t-shirt and gray shorts lifts two black dumbbells four times each."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_24",
        "captions": {
            "2": "A white candle on a brown table extinguishes, leaving the wick glowing brightly for several seconds before emitting a thin trail of smoke.",
            "3": "A white candle on a brown table suddenly bursts into a large flame before quickly extinguishing, leaving a thin trail of smoke.",
            "1": "A white candle on a brown table extinguishes, leaving the wick glowing briefly before emitting a thin trail of smoke."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_25",
        "captions": {
            "2": "Blowing into a plastic container of turmeric-colored liquid turns it orange, then back to turmeric.",
            "3": "Blowing into a plastic container of turmeric-colored liquid turns it purple, then back to turmeric.",
            "1": "Blowing into a plastic container of turmeric-colored liquid turns it red, then back to turmeric."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_26",
        "captions": {
            "2": "Pouring blue liquid into water on a heated surface turns the combined liquid green.",
            "3": "Pouring red liquid into water on a heated surface turns the combined liquid green.",
            "1": "Pouring purple liquid into water on a heated surface turns the combined liquid green."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_27",
        "captions": {
            "2": "Blue litmus paper changes to red upon contact with the liquid in the second spoon from the left.",
            "3": "Blue litmus paper remains blue upon contact with the liquid in the second spoon from the left.",
            "1": "Blue litmus paper changes to orange upon contact with the liquid in the second spoon from the left."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_28",
        "captions": {
            "2": "A person drops iodine onto a potato, causing it to turn dark brown.",
            "3": "A person drops iodine onto a potato, causing it to turn yellow.",
            "1": "A person drops iodine onto a potato, causing it to turn black."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_29",
        "captions": {
            "2": "Video shows a transition from a lightning-filled cloudy sky to a rainstorm affecting a grassy area with high-velocity winds and two individuals battling the conditions.",
            "3": "Video shows a transition from a lightning-filled cloudy sky to a bright sunny day, with two individuals running across a field.",
            "1": "Video shows a transition from a lightning-filled cloudy sky to a powerful tornado affecting a grassy area with high-velocity winds and two individuals battling the conditions."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_30",
        "captions": {
            "2": "The short-haired individual appears curious and reflective after hearing the long-haired person speak.",
            "3": "The short-haired individual appears excited and joyful after hearing the long-haired person speak.",
            "1": "The short-haired individual appears helpless and unhappy after hearing the long-haired person speak."
        },
        "dataset": "autoeval_video",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_31",
        "captions": {
            "2": "A cube-shaped rubber object is moving.",
            "3": "A star-shaped rubber object is moving.",
            "1": "A cylinder-shaped rubber object is moving."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_32",
        "captions": {
            "2": "A stationary blue cube is visible at the start of the video.",
            "3": "A stationary blue pyramid is visible at the start of the video.",
            "1": "A stationary blue cylinder is visible at the start of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_33",
        "captions": {
            "2": "A stationary metal cube.",
            "3": "A stationary metal pyramid.",
            "1": "A stationary metal sphere."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_34",
        "captions": {
            "2": "A rectangular object exits the scene in the video.",
            "3": "A spiral-shaped object exits the scene in the video.",
            "1": "A cylindrical object exits the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_35",
        "captions": {
            "2": "A stationary conical rubber object.",
            "3": "A stationary star-shaped rubber object.",
            "1": "A stationary cylindrical rubber object."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_36",
        "captions": {
            "2": "A stationary red rubber object is featured at the beginning of the video.",
            "3": "A stationary black rubber object is featured at the beginning of the video.",
            "1": "A stationary brown rubber object is featured at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_37",
        "captions": {
            "2": "A brown object approaches a stationary cube.",
            "3": "A brown object approaches a stationary star-shaped prism.",
            "1": "A brown object approaches a stationary cylinder."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_38",
        "captions": {
            "2": "A cylindrical rubber object moves at the beginning of the video.",
            "3": "A pyramid-shaped rubber object moves at the beginning of the video.",
            "1": "A spherical rubber object moves at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_39",
        "captions": {
            "2": "A cone-shaped rubber object is moving at the end of the video.",
            "3": "A star-shaped rubber object is moving at the end of the video.",
            "1": "A cylinder-shaped rubber object is moving at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_40",
        "captions": {
            "2": "A moving rubber sphere is shown at the beginning of the video.",
            "3": "A moving rubber star is shown at the beginning of the video.",
            "1": "A moving rubber cube is shown at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_41",
        "captions": {
            "2": "A tan object exits the scene in the video.",
            "3": "A turquoise object exits the scene in the video.",
            "1": "A brown object exits the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_42",
        "captions": {
            "2": "A black object is the last to enter the scene in the video.",
            "3": "A purple object is the last to enter the scene in the video.",
            "1": "A brown object is the last to enter the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_43",
        "captions": {
            "2": "A stationary brown cube appears at the end of the video.",
            "3": "A stationary brown pyramid appears at the end of the video.",
            "1": "A stationary brown sphere appears at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_44",
        "captions": {
            "2": "A stationary blue metal object is shown at the start of the video.",
            "3": "A stationary red metal object is shown at the start of the video.",
            "1": "A stationary purple metal object is shown at the start of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_45",
        "captions": {
            "1": "A moving cube appears at the start of the video.",
            "2": "A moving sphere appears at the start of the video.",
            "3": "A moving star appears at the start of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_46",
        "captions": {
            "2": "A stationary rubber sphere at the beginning of the video.",
            "3": "A stationary rubber star at the beginning of the video.",
            "1": "A stationary rubber cube at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_47",
        "captions": {
            "2": "Black rubber object moves toward the video end.",
            "3": "Red rubber object moves toward the video end.",
            "1": "Gray rubber object moves toward the video end."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_48",
        "captions": {
            "2": "An orange rubber cylinder is moving.",
            "3": "A purple rubber cylinder is moving.",
            "1": "A yellow rubber cylinder is moving."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_49",
        "captions": {
            "2": "A yellow object enters the scene in the video where a blue object is stationary.",
            "3": "A yellow object enters the scene in the video where a green object is stationary.",
            "1": "A yellow object enters the scene in the video where a purple object is stationary."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_50",
        "captions": {
            "2": "An orange object is the last to enter the scene in the video.",
            "3": "A purple object is the last to enter the scene in the video.",
            "1": "A yellow object is the last to enter the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_51",
        "captions": {
            "2": "A rubber sphere remains stationary.",
            "3": "A rubber pyramid remains stationary.",
            "1": "A rubber cube remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_52",
        "captions": {
            "2": "A cube is the last object to exit the scene in the video.",
            "3": "A star is the last object to exit the scene in the video.",
            "1": "A sphere is the last object to exit the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_53",
        "captions": {
            "2": "A blue stationary cube is present at the beginning of the video.",
            "3": "A pink stationary cube is present at the beginning of the video.",
            "1": "A cyan stationary cube is present at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_54",
        "captions": {
            "2": "A rectangular object remains stationary at the end of the video.",
            "3": "A spherical object remains stationary at the end of the video.",
            "1": "A cylindrical object remains stationary at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_55",
        "captions": {
            "2": "A stationary cube appears at the end of the video.",
            "3": "A stationary pyramid appears at the end of the video.",
            "1": "A stationary sphere appears at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_56",
        "captions": {
            "2": "A stationary gray cylinder is seen at the beginning of the video.",
            "3": "A stationary red cylinder is seen at the beginning of the video.",
            "1": "A stationary brown cylinder is seen at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_57",
        "captions": {
            "2": "A stationary beige cylinder at the start of the video.",
            "3": "A stationary violet cylinder at the start of the video.",
            "1": "A stationary brown cylinder at the start of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_58",
        "captions": {
            "2": "A blue rubber object is moving.",
            "3": "A yellow rubber object is moving.",
            "1": "A purple rubber object is moving."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_59",
        "captions": {
            "2": "A stationary yellow sphere is shown.",
            "3": "A stationary yellow star is shown.",
            "1": "A stationary yellow cube is shown."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_60",
        "captions": {
            "2": "Three moving cylinders are present in the video.",
            "3": "Five moving cylinders are present in the video.",
            "1": "Two moving cylinders are present in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_61",
        "captions": {
            "1": "Stationary cylinder with a moving purple object entering the scene in the video.",
            "2": "Stationary cone with a moving purple object entering the scene in the video.",
            "3": "Stationary cube with a moving purple object entering the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_62",
        "captions": {
            "2": "A yellow sphere is moving at the end of the video.",
            "3": "A yellow star is moving at the end of the video.",
            "1": "A yellow cube is moving at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_63",
        "captions": {
            "2": "A oval rubber object is moving at the beginning of the video.",
            "3": "A spherical rubber object is moving at the beginning of the video.",
            "1": "A cylindrical rubber object is moving at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_64",
        "captions": {
            "2": "A cylinder is moving when the video begins.",
            "3": "A pyramid is moving when the video begins.",
            "1": "A cube is moving when the video begins."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_65",
        "captions": {
            "2": "The stationary metal object is orange at the end of the video.",
            "3": "The stationary metal object is pink at the end of the video.",
            "1": "The stationary metal object is yellow at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_66",
        "captions": {
            "2": "A grey object enters the scene in the video last.",
            "3": "A turquoise object enters the scene in the video last.",
            "1": "A brown object enters the scene in the video last."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_67",
        "captions": {
            "2": "The video features a stationary rectangular rubber object.",
            "3": "The video features a stationary star-shaped rubber object.",
            "1": "The video features a stationary cylindrical rubber object."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_68",
        "captions": {
            "2": "A stationary spherical rubber object.",
            "3": "A stationary star-shaped rubber object.",
            "1": "A stationary cylindrical rubber object."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_69",
        "captions": {
            "2": "A stationary cylindrical wooden object.",
            "3": "A stationary hexagonal metal object.",
            "1": "A stationary cylindrical metal object."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_70",
        "captions": {
            "2": "A stationary black metal object is present at the beginning of the video.",
            "3": "A stationary pink metal object is present at the beginning of the video.",
            "1": "A stationary brown metal object is present at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_71",
        "captions": {
            "2": "A stationary green cone at the end of the video.",
            "3": "A stationary green star at the end of the video.",
            "1": "A stationary green cylinder at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_72",
        "captions": {
            "2": "A cone enters the scene in the video where a stationary cube is present.",
            "3": "A star enters the scene in the video where a stationary cube is present.",
            "1": "A cylinder enters the scene in the video where a stationary cube is present."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_73",
        "captions": {
            "1": "A stationary cylinder is present when the video begins.",
            "2": "A stationary cone is present when the video begins.",
            "3": "A stationary star is present when the video begins."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_74",
        "captions": {
            "2": "A blue stationary metal cube is shown at the beginning.",
            "3": "A red stationary metal cube is shown at the beginning.",
            "1": "A green stationary metal cube is shown at the beginning."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_75",
        "captions": {
            "2": "In the video, there is a stationary purple sphere.",
            "3": "In the video, there is a stationary purple pyramid.",
            "1": "In the video, there is a stationary purple cube."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_76",
        "captions": {
            "2": "A stationary metal object in the shape of a sphere.",
            "3": "A stationary metal object in the shape of a star.",
            "1": "A stationary metal object in the shape of a cube."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_77",
        "captions": {
            "2": "A moving metal sphere shapes the end of the video.",
            "3": "A moving metal pyramid shapes the end of the video.",
            "1": "A moving metal cube shapes the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_78",
        "captions": {
            "2": "A pyramid-shaped metal object is moving as the video ends.",
            "3": "A star-shaped metal object is moving as the video ends.",
            "1": "A cube-shaped metal object is moving as the video ends."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_79",
        "captions": {
            "2": "A stationary metal cube appears when the video begins.",
            "3": "A stationary metal pyramid appears when the video begins.",
            "1": "A stationary metal sphere appears when the video begins."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_80",
        "captions": {
            "2": "A stationary rectangular object is present.",
            "3": "A stationary triangular object is present.",
            "1": "A stationary cylindrical object is present."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_81",
        "captions": {
            "2": "The video ends with a moving blue cube.",
            "3": "The video ends with a moving yellow cube.",
            "1": "The video ends with a moving purple cube."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_82",
        "captions": {
            "2": "There is a stationary cube when the video ends.",
            "3": "There is a stationary pyramid when the video ends.",
            "1": "There is a stationary cylinder when the video ends."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_83",
        "captions": {
            "2": "The second object that enters the scene in the video is dark red.",
            "3": "The second object that enters the scene in the video is purple.",
            "1": "The second object that enters the scene in the video is brown."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_84",
        "captions": {
            "2": "A black rubber sphere is moving.",
            "3": "A red rubber sphere is moving.",
            "1": "A gray rubber sphere is moving."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_85",
        "captions": {
            "2": "A blue object enters the scene in the video.",
            "3": "A magenta object enters the scene in the video.",
            "1": "A cyan object enters the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_86",
        "captions": {
            "2": "A cube-shaped metal object is moving at the end of the video.",
            "3": "A star-shaped metal object is moving at the end of the video.",
            "1": "A sphere-shaped metal object is moving at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_87",
        "captions": {
            "2": "Five cubes are moving at the end of the video.",
            "3": "Seven cubes are moving at the end of the video.",
            "1": "Four cubes are moving at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_88",
        "captions": {
            "2": "A stationary metal sphere is shown when the video begins.",
            "3": "A stationary metal pyramid is shown when the video begins.",
            "1": "A stationary metal cube is shown when the video begins."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_89",
        "captions": {
            "2": "A rectangular object enters the scene in the video.",
            "3": "A hexagonal object enters the scene in the video.",
            "1": "A cylindrical object enters the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_90",
        "captions": {
            "1": "A cube-shaped metal object moves at the beginning of the video.",
            "2": "A rectangular-shaped metal object moves at the beginning of the video.",
            "3": "A sphere-shaped metal object moves at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_91",
        "captions": {
            "2": "A stationary rubber cylinder is shown at the end of the video.",
            "3": "A stationary rubber star is shown at the end of the video.",
            "1": "A stationary rubber cube is shown at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_92",
        "captions": {
            "2": "A stationary cone is seen when the video ends.",
            "3": "A stationary pyramid is seen when the video ends.",
            "1": "A stationary cylinder is seen when the video ends."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_93",
        "captions": {
            "2": "A rectangular rubber object moves at the beginning of the video.",
            "3": "A star-shaped rubber object moves at the beginning of the video.",
            "1": "A cylindrical rubber object moves at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_94",
        "captions": {
            "2": "The video ends with a red stationary object.",
            "3": "The video ends with a purple stationary object.",
            "1": "The video ends with a yellow stationary object."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_95",
        "captions": {
            "2": "A stationary sphere is shown at the beginning of the video.",
            "3": "A stationary pyramid is shown at the beginning of the video.",
            "1": "A stationary cube is shown at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_96",
        "captions": {
            "2": "A stationary spherical rubber object at the beginning.",
            "3": "A stationary star-shaped rubber object at the beginning.",
            "1": "A stationary cylindrical rubber object at the beginning."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_97",
        "captions": {
            "2": "A black rubber object moves at the start of the video.",
            "3": "A pink rubber object moves at the start of the video.",
            "1": "A brown rubber object moves at the start of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_98",
        "captions": {
            "2": "A black stationary metal object is shown at the beginning of the video.",
            "3": "A yellow stationary metal object is shown at the beginning of the video.",
            "1": "A gray stationary metal object is shown at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_99",
        "captions": {
            "2": "A stationary rubber cube is shown at the beginning.",
            "3": "A stationary rubber pyramid is shown at the beginning.",
            "1": "A stationary rubber sphere is shown at the beginning."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_100",
        "captions": {
            "2": "A stationary cubical rubber object at the beginning of the video.",
            "3": "A stationary pyramid-shaped rubber object at the beginning of the video.",
            "1": "A stationary spherical rubber object at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_101",
        "captions": {
            "2": "A stationary rectangular object is shown in the video.",
            "3": "A stationary star-shaped object is shown in the video.",
            "1": "A stationary cylindrical object is shown in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "shape"
    },
    {
        "video": "attribute_102",
        "captions": {
            "2": "The video ends with a blue stationary object.",
            "3": "The video ends with a red stationary object.",
            "1": "The video ends with a green stationary object."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_103",
        "captions": {
            "2": "A dark brown object enters the scene in the video second.",
            "3": "A bright yellow object enters the scene in the video second.",
            "1": "A brown object enters the scene in the video second."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_104",
        "captions": {
            "1": "One metal object enters the scene in the video.",
            "2": "Two metal objects enter the scene in the video.",
            "3": "Three metal objects enter the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_105",
        "captions": {
            "2": "Two metal objects enter the scene in the video.",
            "3": "Three metal objects enter the scene in the video.",
            "1": "A single metal object enters the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_106",
        "captions": {
            "2": "Three gray objects are moving when the video ends.",
            "3": "Four gray objects are moving when the video ends.",
            "1": "Two gray objects are moving when the video ends."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_107",
        "captions": {
            "2": "There are two stationary blue objects in the video.",
            "3": "There are three stationary blue objects in the video.",
            "1": "There is one stationary blue object in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_108",
        "captions": {
            "2": "Four objects are moving.",
            "3": "Seven objects are moving.",
            "1": "Three objects are moving."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_109",
        "captions": {
            "2": "The video ends with 4 moving metal objects.",
            "3": "The video ends with 5 moving metal objects.",
            "1": "The video ends with 3 moving metal objects."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_110",
        "captions": {
            "2": "Three rubber objects enter the scene in the video.",
            "3": "Five rubber objects enter the scene in the video.",
            "1": "Two rubber objects enter the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_111",
        "captions": {
            "2": "Four collisions occur in the video.",
            "3": "Six collisions occur in the video.",
            "1": "Three collisions occur in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_112",
        "captions": {
            "2": "Four collisions occur in the video.",
            "3": "Seven collisions occur in the video.",
            "1": "Three collisions occur in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_113",
        "captions": {
            "2": "The video ends with 3 stationary metal objects.",
            "3": "The video ends with 5 stationary metal objects.",
            "1": "The video ends with 2 stationary metal objects."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_114",
        "captions": {
            "2": "There are three stationary purple objects in the video.",
            "3": "There are five stationary purple objects in the video.",
            "1": "There are two stationary purple objects in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_115",
        "captions": {
            "2": "Four objects are moving at the end of the video.",
            "3": "Six objects are moving at the end of the video.",
            "1": "Three objects are moving at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_116",
        "captions": {
            "1": "Three moving objects are present when the video concludes.",
            "2": "Four moving objects are present when the video concludes.",
            "3": "Five moving objects are present when the video concludes."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_117",
        "captions": {
            "2": "Four metal spheres enter the scene in the video.",
            "3": "Five metal spheres enter the scene in the video.",
            "1": "Three metal spheres enter the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_118",
        "captions": {
            "2": "Three metal cubes enter the scene in the video.",
            "3": "Four metal cubes enter the scene in the video.",
            "1": "Two metal cubes enter the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_119",
        "captions": {
            "2": "Four moving red objects are present at the end of the video.",
            "3": "Five moving red objects are present at the end of the video.",
            "1": "Three moving red objects are present at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_120",
        "captions": {
            "2": "Four moving red objects are visible when the video ends.",
            "3": "Five moving red objects are visible when the video ends.",
            "1": "Three moving red objects are visible when the video ends."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_121",
        "captions": {
            "2": "Four objects enter the scene in the video.",
            "3": "Ten objects enter the scene in the video.",
            "1": "Three objects enter the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_122",
        "captions": {
            "2": "Five stationary rubber objects are present at the beginning of the video.",
            "3": "Seven stationary rubber objects are present at the beginning of the video.",
            "1": "Four stationary rubber objects are present at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_123",
        "captions": {
            "2": "Three stationary cylinders are present at the beginning of the video.",
            "3": "Five stationary cylinders are present at the beginning of the video.",
            "1": "Two stationary cylinders are present at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_124",
        "captions": {
            "2": "Four stationary rubber objects when the video begins.",
            "3": "Five stationary rubber objects when the video begins.",
            "1": "Three stationary rubber objects when the video begins."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_125",
        "captions": {
            "2": "Four rubber objects are moving.",
            "3": "Ten rubber objects are moving.",
            "1": "Three rubber objects are moving."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_126",
        "captions": {
            "2": "Four stationary objects are present at the start of the video.",
            "3": "Five stationary objects are present at the start of the video.",
            "1": "Three stationary objects are present at the start of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_127",
        "captions": {
            "2": "Most objects are in motion at the end of the video.",
            "3": "A few objects are in motion at the end of the video.",
            "1": "All objects are in motion at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_128",
        "captions": {
            "2": "Three rubber spheres enter the scene in the video.",
            "3": "Five rubber spheres enter the scene in the video.",
            "1": "Two rubber spheres enter the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_129",
        "captions": {
            "1": "Four moving objects are present at the end of the video.",
            "2": "Five moving objects are present at the end of the video.",
            "3": "Seven moving objects are present at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_130",
        "captions": {
            "2": "Five moving objects are present when the video ends.",
            "3": "Seven moving objects are present when the video ends.",
            "1": "Four moving objects are present when the video ends."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_131",
        "captions": {
            "2": "Three cylinders exit the scene in the video.",
            "3": "Four cylinders exit the scene in the video.",
            "1": "Two cylinders exit the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_132",
        "captions": {
            "2": "There are five different moving objects in the video.",
            "3": "There are seven different moving objects in the video.",
            "1": "There are four different moving objects in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_133",
        "captions": {
            "2": "Five separate objects can be observed in motion.",
            "3": "Six separate objects can be observed in motion.",
            "1": "Four separate objects can be observed in motion."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_134",
        "captions": {
            "2": "Three objects are observed entering the scene in the video.",
            "3": "Four objects are observed entering the scene in the video.",
            "1": "Two objects are observed entering the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_135",
        "captions": {
            "2": "Three objects can be observed entering the scene in the video.",
            "3": "Five objects can be observed entering the scene in the video.",
            "1": "Two objects can be observed entering the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_136",
        "captions": {
            "2": "Five objects are moving when the video ends.",
            "3": "Seven objects are moving when the video ends.",
            "1": "Four objects are moving when the video ends."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_137",
        "captions": {
            "2": "Five objects are moving when the video ends.",
            "3": "Six objects are moving when the video ends.",
            "1": "Four objects are moving when the video ends."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_138",
        "captions": {
            "2": "Five objects are moving at the end of the video.",
            "3": "Seven objects are moving at the end of the video.",
            "1": "Four objects are moving at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_139",
        "captions": {
            "2": "Three brown objects enter the scene in the video.",
            "3": "Four brown objects enter the scene in the video.",
            "1": "Two brown objects enter the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_140",
        "captions": {
            "2": "Four metal objects enter the scene in the video.",
            "3": "Six metal objects enter the scene in the video.",
            "1": "Three metal objects enter the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_141",
        "captions": {
            "2": "Four metal objects enter the scene in the video.",
            "3": "Five metal objects enter the scene in the video.",
            "1": "Three metal objects enter the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_142",
        "captions": {
            "2": "Two objects are moving at the end of the video.",
            "3": "Three objects are moving at the end of the video.",
            "1": "One object is moving at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_143",
        "captions": {
            "2": "The video shows five moving rubber objects.",
            "3": "The video shows seven moving rubber objects.",
            "1": "The video shows four moving rubber objects."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_144",
        "captions": {
            "2": "Two stationary purple objects.",
            "3": "Three stationary purple objects.",
            "1": "One stationary purple object."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_145",
        "captions": {
            "2": "There are 5 moving metal objects.",
            "3": "There are 10 moving metal objects.",
            "1": "There are 4 moving metal objects."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_146",
        "captions": {
            "2": "There are 5 moving metal objects in the video.",
            "3": "There are 6 moving metal objects in the video.",
            "1": "There are 4 moving metal objects in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_147",
        "captions": {
            "2": "Five moving metal objects are present.",
            "3": "Ten moving metal objects are present.",
            "1": "Four moving metal objects are present."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_148",
        "captions": {
            "2": "Five objects are stationary at the beginning of the video.",
            "3": "Seven objects are stationary at the beginning of the video.",
            "1": "Four objects are stationary at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_149",
        "captions": {
            "2": "Five stationary objects are visible when the video begins.",
            "3": "Six stationary objects are visible when the video begins.",
            "1": "Four stationary objects are visible when the video begins."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_150",
        "captions": {
            "2": "Three stationary objects are shown at the beginning of the video.",
            "3": "Five stationary objects are shown at the beginning of the video.",
            "1": "Two stationary objects are shown at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_151",
        "captions": {
            "2": "Three stationary objects are present at the beginning of the video.",
            "3": "Four stationary objects are present at the beginning of the video.",
            "1": "Two stationary objects are present at the beginning of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_152",
        "captions": {
            "2": "Three moving red objects are seen in the video.",
            "3": "Five moving red objects are seen in the video.",
            "1": "Two moving red objects are seen in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_153",
        "captions": {
            "2": "Two metal objects are moving.",
            "3": "Six metal objects are moving.",
            "1": "A single metal object is moving."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_154",
        "captions": {
            "2": "Video ends with 5 stationary objects.",
            "3": "Video ends with 6 stationary objects.",
            "1": "Video ends with 4 stationary objects."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_155",
        "captions": {
            "2": "Five stationary objects are present when the video ends.",
            "3": "Seven stationary objects are present when the video ends.",
            "1": "Four stationary objects are present when the video ends."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_156",
        "captions": {
            "2": "Five stationary objects at the end of the video.",
            "3": "Six stationary objects at the end of the video.",
            "1": "Four stationary objects at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_157",
        "captions": {
            "2": "Four objects are moving when the video ends.",
            "3": "Seven objects are moving when the video ends.",
            "1": "Three objects are moving when the video ends."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_158",
        "captions": {
            "2": "Three blue objects enter the scene in the video.",
            "3": "Five blue objects enter the scene in the video.",
            "1": "Two blue objects enter the scene in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_159",
        "captions": {
            "2": "Five moving metal objects are present at the end of the video.",
            "3": "Seven moving metal objects are present at the end of the video.",
            "1": "Four moving metal objects are present at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_160",
        "captions": {
            "2": "Three yellow objects are moving at the end of the video.",
            "3": "Five yellow objects are moving at the end of the video.",
            "1": "Two yellow objects are moving at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_161",
        "captions": {
            "2": "The video begins with 3 moving yellow objects.",
            "3": "The video begins with 5 moving yellow objects.",
            "1": "The video begins with 2 moving yellow objects."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_162",
        "captions": {
            "2": "Four stationary objects are present when the video begins.",
            "3": "Five stationary objects are present when the video begins.",
            "1": "Three stationary objects are present when the video begins."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_163",
        "captions": {
            "2": "Three moving objects are present when the video ends.",
            "3": "Five moving objects are present when the video ends.",
            "1": "Two moving objects are present when the video ends."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_164",
        "captions": {
            "2": "Three moving objects are present at the end of the video.",
            "3": "Five moving objects are present at the end of the video.",
            "1": "Two moving objects are present at the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_165",
        "captions": {
            "2": "Most spheres have stopped moving by the end of the video.",
            "3": "A few spheres have stopped moving by the end of the video.",
            "1": "All spheres have stopped moving by the end of the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_166",
        "captions": {
            "2": "Five collisions occur in the video.",
            "3": "Seven collisions occur in the video.",
            "1": "Four collisions occur in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_167",
        "captions": {
            "2": "Four rubber objects are moving when the video ends.",
            "3": "Five rubber objects are moving when the video ends.",
            "1": "Three rubber objects are moving when the video ends."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_168",
        "captions": {
            "2": "Five moving objects are present in the video.",
            "3": "Eight moving objects are present in the video.",
            "1": "Four moving objects are present in the video."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_169",
        "captions": {
            "2": "The video begins with 3 stationary rubber objects.",
            "3": "The video begins with 5 stationary rubber objects.",
            "1": "The video begins with 2 stationary rubber objects."
        },
        "dataset": "mvbench",
        "aspect": "attribute",
        "subaspect" : "count"
    },
    {
        "video": "attribute_170",
        "captions": {
            "2": "The eye is opening from a half-open state.",
            "3": "The eye is closing from an open state.",
            "1": "The eye is opening from a closed state."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_171",
        "captions": {
            "2": "The ice cream is evaporating into gas.",
            "3": "The ice cream is freezing and becoming solid.",
            "1": "The ice cream is melting and turning into liquid."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_172",
        "captions": {
            "2": "The traffic lights are changing from red to yellow.",
            "3": "The traffic lights are changing from green to red.",
            "1": "The traffic lights are changing from red to green."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_173",
        "captions": {
            "2": "The concrete wall has a few cracks forming gradually.",
            "3": "The concrete wall is mysteriously regenerating itself.",
            "1": "The concrete wall is falling apart."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_174",
        "captions": {
            "2": "Flowers slightly decaying from full blossom to scattered petals.",
            "3": "Flowers regrowing from withered to full blossom.",
            "1": "Flowers decaying from full blossom to withered."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_175",
        "captions": {
            "2": "Bronze particles are exploding.",
            "3": "Silver particles are exploding.",
            "1": "Golden particles are exploding."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_176",
        "captions": {
            "2": "The light bulb flickers then lights up.",
            "3": "The light bulb dims to a faint glow.",
            "1": "The light bulb lights up."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_177",
        "captions": {
            "2": "A digital bar chart shows a decreasing trend, followed by an increasing trend.",
            "3": "A digital bar chart shows a decreasing trend.",
            "1": "A digital bar chart shows an increasing trend ."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_178",
        "captions": {
            "2": "An apple is shrinking and turning red.",
            "3": "An apple is expanding and becoming juicier.",
            "1": "An apple is shrinking and drying out."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "size"
    },
    {
        "video": "attribute_179",
        "captions": {
            "2": "The pile of salt is growing smaller.",
            "3": "The pile of salt does not change in size.",
            "1": "The pile of salt is growing bigger."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "size"
    },
    {
        "video": "attribute_180",
        "captions": {
            "2": "3D shapes transforming from cubes into pyramids.",
            "3": "3D shapes transforming from spheres into cubes.",
            "1": "3D shapes transforming from cubes into spheres."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_181",
        "captions": {
            "2": "A green leaf is gradually turning brown.",
            "3": "A green leaf shifts from yellow to vibrant red.",
            "1": "A green leaf is gradually turning yellow."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_182",
        "captions": {
            "2": "Pink flowers transforming from withered to partly bloomed.",
            "3": "Pink flowers transforming from fully bloomed to withered.",
            "1": "Pink flowers transforming from withered to fully bloomed."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_183",
        "captions": {
            "2": "The lighting on the football players briefly dims before becoming brighter and making them more visible.",
            "3": "The lighting on the football players gradually dims and making them less visible",
            "1": "The lighting on the football players is becoming brighter and making them more visible."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_184",
        "captions": {
            "2": "The fireball is expanding but starts to shrink slightly.",
            "3": "The fireball is shrinking and becomes smaller.",
            "1": "The fireball is expanding and growing larger."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "size"
    },
    {
        "video": "attribute_185",
        "captions": {
            "2": "The glass is being filled with water until it is completely full.",
            "3": "The glass is being filled with water until it overflows.",
            "1": "The glass is being filled with water until it is about 70% full."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_186",
        "captions": {
            "2": "Yellow foam appears on the surface of the pink liquid.",
            "3": "Green foam appears on the surface of the pink liquid.",
            "1": "White foam appears on the surface of the pink liquid."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_187",
        "captions": {
            "1": "A skyscraper is collapsing.",
            "2": "A skyscraper is tilting to one side.",
            "3": "A skyscraper is growing taller."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_188",
        "captions": {
            "2": "The coronavirus weakens slightly under the attack of antibodies.",
            "3": "The coronavirus multiplies despite the attack of antibodies.",
            "1": "The coronavirus dissolves under the attack of antibodies."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_189",
        "captions": {
            "1": "The battery charge is increasing.",
            "2": "The battery charge is increasing slowly after a brief drop.",
            "3": "The battery charge is quickly decreasing."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_190",
        "captions": {
            "2": "The number of fruits and vegetables increases before slowly disappearing.",
            "3": "The number of fruits and vegetables is decreasing.",
            "1": "The number of fruits and vegetables is increasing."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_191",
        "captions": {
            "2": "Wrinkled paper ball unfolding into a flattened sheet, then being crumpled again.",
            "3": "A flattened sheet being crumpled into a wrinkled paper ball.",
            "1": "Wrinkled paper ball unfolding into a flattened sheet."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_192",
        "captions": {
            "1": "The sphere building is changing from green to blue.",
            "2": "The sphere building is changing from green to red.",
            "3": "The sphere building is changing from blue to green."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_193",
        "captions": {
            "2": "A building is being constructed and growing wider.",
            "3": "A building is being constructed, then starts shrinking.",
            "1": "A building is being constructed and growing taller."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "size"
    },
    {
        "video": "attribute_194",
        "captions": {
            "1": "3D pixels transform from a cuboid into the shape of a man.",
            "2": "3D pixels transform from a cuboid into the shape of a dog.",
            "3": "3D pixels transform from a cuboid into the shape of a tree."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_195",
        "captions": {
            "2": "The planet Earth is disassembling into toy bricks.",
            "3": "The planet Earth is transforming into a toy brick.",
            "1": "The planet Earth is assembling from toy bricks."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_196",
        "captions": {
            "2": "Glacier rapidly melting and disappearing into water.",
            "3": "Glacier slowly forming from still water.",
            "1": "Glacier breaking and falling into water."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_197",
        "captions": {
            "1": "The sport stadium is turning brighter.",
            "2": "The sport stadium turns bright, then dims slightly.",
            "3": "The sport stadium is turning darker."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_198",
        "captions": {
            "2": "Black ink is contracting towards the center.",
            "3": "Black ink is swirling around the center.",
            "1": "Black ink is expanding from the center."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "size"
    },
    {
        "video": "attribute_199",
        "captions": {
            "2": "A 3D house model is being constructed and then taken apart.",
            "3": "A 3D house model is being taken apart.",
            "1": "A 3D house model is being constructed."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_200",
        "captions": {
            "2": "A face morphs from a chimpanzee into another animal.",
            "3": "A face morphs from a human into a chimpanzee.",
            "1": "A face morphs from a chimpanzee into a human."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_201",
        "captions": {
            "2": "The background changes from pink to green.",
            "3": "The background changes from blue to pink.",
            "1": "The background changes from pink to blue."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "color"
    },
    {
        "video": "attribute_202",
        "captions": {
            "2": "An insect is transforming from larva into a butterfly.",
            "3": "An insect is transforming from butterfly into a pupa.",
            "1": "An insect is transforming from pupa into a butterfly."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_203",
        "captions": {
            "2": "A 3D pixel transforms from a white square into a butterfly.",
            "3": "A 3D pixel transforms from a white square into a bird.",
            "1": "A 3D pixel transforms from a white square into a flower."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_204",
        "captions": {
            "2": "A 3D heart gradually appears and then disappears.",
            "3": "A 3D heart gradually disappears.",
            "1": "A 3D heart gradually appears."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "attribute_205",
        "captions": {
            "2": "The room is gradually becoming dimmer and then brightens again.",
            "3": "The room is gradually becoming brighter.",
            "1": "The room is gradually becoming darker."
        },
        "dataset": "tempcompass",
        "aspect": "attribute",
        "subaspect" : "state_change"
    },
    {
        "video": "order_1",
        "captions": {
            "2": "The letters displayed at the end of the video are e, m, n.",
            "3": "The letters displayed at the end of the video are n, m, e.",
            "1": "The letters displayed at the end of the video are m, e, n."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_2",
        "captions": {
            "2": "The letters at the end are m, e, r.",
            "3": "The letters at the end are r, e, m.",
            "1": "The letters at the end are e, m, r."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_3",
        "captions": {
            "2": "The letters appear in the order: e, r, m at the end.",
            "3": "The letters appear in the order: r, m, e at the end.",
            "1": "The letters appear in the order: e, m, r at the end."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_4",
        "captions": {
            "2": "Letters e, a, and c appear at the end.",
            "3": "Letters e, c and a appear at the end.",
            "1": "Letters a, c, and e appear at the end."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_5",
        "captions": {
            "2": "The letters a, o, m, r appear at the end.",
            "3": "The letters r, m, o, a appear at the end.",
            "1": "The letters a, m, o, r appear at the end."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_6",
        "captions": {
            "2": "The order of the letters at the end is j, b, x, d.",
            "3": "The order of the letters at the end is b, d, j, x.",
            "1": "The order of the letters at the end is j, x, b, d."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_7",
        "captions": {
            "2": "The video ends with the letters in the sequence: x, d, b, m.",
            "3": "The video ends with the letters in the sequence: m, x, b, d.",
            "1": "Caption: The video ends with the letters in the sequence: x, b, d, m."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_8",
        "captions": {
            "2": "The letters appear in the order: e, m, n at the end.",
            "3": "The letters appear in the order: m, n, e at the end.",
            "1": "The letters appear in the order: e, n, m at the end."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_9",
        "captions": {
            "2": "The letters at the end are c, h, and a.",
            "3": "The letters at the end are h, a, and c.",
            "1": "The letters at the end are c, a, and h."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_10",
        "captions": {
            "2": "The letters at the end are e, r, w.",
            "3": "The letters at the end are w, r, e.",
            "1": "The letters at the end are e, w, r."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_11",
        "captions": {
            "2": "The order of the letters at the end is j, y, o.",
            "3": "The order of the letters at the end is y, o, j.",
            "1": "The order of the letters at the end is j, o, y."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_12",
        "captions": {
            "2": "Letters appear in the order: j, y, o.",
            "3": "Letters appear in the order: y, o, j.",
            "1": "Letters appear in the order: j, o, y."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_13",
        "captions": {
            "2": "The video ends with the letters in the order: o, p, s.",
            "3": "The video ends with the letters in the order: p, s, o.",
            "1": "The video ends with the letters in the order: o, s, p."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_14",
        "captions": {
            "2": "Letters appear in the order: a, c, b at the end.",
            "3": "Letters appear in the order: b, a c at the end.",
            "1": "Letters appear in the order: c, a, b at the end."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_15",
        "captions": {
            "2": "The person claps twice, once, and then three times.",
            "3": "The person claps three times, once, and then twice.",
            "1": "The person claps once, twice, and then three times."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_16",
        "captions": {
            "2": "The person claps twice, then once, and finally three times.",
            "3": "The person claps three times, then twice, and finally once.",
            "1": "The person claps once, then twice, and finally three times."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_17",
        "captions": {
            "2": "The person claps once, claps three times, and then pretends to clap twice.",
            "3": "The person claps three times, pretends to clap twice, and then claps once.",
            "1": "The person claps once, pretends to clap twice, and claps three times."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_18",
        "captions": {
            "2": "The person pretends to clap twice, claps once, then claps three times.",
            "3": "The person claps three times, pretends to clap twice, then claps once.",
            "1": "The person claps once, pretends to clap twice, then claps three times."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_19",
        "captions": {
            "2": "A person claps once, then claps three times, and finally pretends to clap twice.",
            "3": "A person pretends to clap twice, claps once, then claps three times.",
            "1": "A person claps once, pretends to clap twice, then claps three times."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_20",
        "captions": {
            "2": "The person pretends to clap once, claps once, claps twice, pretends to clap twice, and claps three times.",
            "3": "The person claps twice, claps once, pretends to clap twice, pretends to clap once, and claps three times.",
            "1": "The person claps once, pretends to clap once, claps twice, pretends to clap twice, and claps three times."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_21",
        "captions": {
            "2": "The person puts a notebook, one pen and two notebooks in the backpack in that order.",
            "3": "The person puts one pen and three notebooks in the backpack in that order.",
            "1": "The person puts three notebooks and one pen in the backpack in that order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_22",
        "captions": {
            "2": "The person puts two notebooks into the backpack, followed by one pen and then one notebook.",
            "3": "The person puts one pen into the backpack, followed by three notebooks.",
            "1": "The person puts three notebooks followed by one pen into the backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_23",
        "captions": {
            "2": "The person puts a pen, a book, and another pen in the backpack.",
            "3": "The person puts a pen, another pen, and a book in the backpack.",
            "1": "The person puts a book, a pen, and another pen in the backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_24",
        "captions": {
            "2": "The person puts a pen in the backpack first, a book, and another person inside the backpack.",
            "3": "The person puts two pens and a book in the backpack in that order.",
            "1": "The person puts a book and two pens in the backpack in that order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_25",
        "captions": {
            "2": "The person puts the bottle, wallet, and black-box in the backpack.",
            "3": "The person puts the black-box, bottle, and wallet in the backpack.",
            "1": "The person puts the wallet, bottle, and black-box in the backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_26",
        "captions": {
            "2": "Person packs bottle, wallet, and black-box into backpack.",
            "3": "Person packs black-box, wallet, and bottle into backpack.",
            "1": "Person packs wallet, bottle, and black-box into backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_27",
        "captions": {
            "2": "The person puts the laptop, pouch, book, and another book in the backpack.",
            "3": "The person puts the book, laptop, another book, and the pouch in the backpack.",
            "1": "The person puts the pouch, laptop, book, and another book in the backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_28",
        "captions": {
            "2": "Person packs a backpack in the order: book, pen, t-shirt, shorts, laptop.",
            "3": "Person packs a backpack in the order: book, pen, shorts, t-shirt, laptop.",
            "1": "Person packs a backpack in the order: pen, book, t-shirt, shorts, laptop."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_29",
        "captions": {
            "2": "Person packing a backpack in the order of bottle, charger, jar, remote-control, spoon and fork, and pen.",
            "3": "Person packing a backpack in the order of pen, jar, remote-control, spoon and fork, charger, and bottle.",
            "1": "Person packing a backpack in the order of bottle, jar, charger, remote-control, spoon and fork, and pen."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_30",
        "captions": {
            "2": "Person packing a backpack with a mobile-phone, pen, goggles, book, and book in that order.",
            "3": "Person packing a backpack with a pen, mobile-phone, book, goggles, and book in that order.",
            "1": "Person packing a backpack with a mobile-phone, pen, book, book, and goggles in that order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_31",
        "captions": {
            "2": "Person packs the backpack with a book, some pens, a box, and a t-shirt in that order.",
            "3": "Person packs the backpack with a t-shirt, some pens, a book, and a box in that order.",
            "1": "Person packs the backpack with a book, a box, some pens, and a t-shirt in that order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_32",
        "captions": {
            "2": "Person packs a book, a power-bank and then another book into a backpack.",
            "3": "Person packs  a power-bank and then two books and into a backpack",
            "1": "Person packs two books and then a power-bank into a backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_33",
        "captions": {
            "2": "Person packs a book, bottle and then watch into a backpack.",
            "3": "Person packs a watch, bottle and then book into a backpack.",
            "1": "Person packs a bottle, book, and then watch into a backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_34",
        "captions": {
            "2": "The person puts a pen and book into the backpack at the same time.",
            "3": "The person puts a pen in the backpack and then puts a book in..",
            "1": "The person puts a book and then a pen in the backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_35",
        "captions": {
            "2": "Person puts a pen and a book into the backpack at the same time.",
            "3": "Person puts a pen and then a book into the backpack.",
            "1": "Person puts a book and then a pen into the backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_36",
        "captions": {
            "2": "Person puts a cloth, banana, and pencil in a backpack.",
            "3": "Person puts a pencil in a backpack, then a clothe, and lastly a banana.",
            "1": "Person puts a banana, cloth, and pencil in a backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_37",
        "captions": {
            "2": "A person places two books, two pens, a tablet, and a cloth into a backpack in that order.",
            "3": "A person places a cloth, a tablet, two books and two pens into a backpack in that order.",
            "1": "A person places two books, a tablet, two pens, and a cloth into a backpack in that order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_38",
        "captions": {
            "2": "The person puts a top, two books, and pants in the backpack in that order.",
            "3": "The person puts two books, pants, and a top in the backpack in that order.",
            "1": "The person puts a top, pants, and two books in the backpack in that order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_39",
        "captions": {
            "2": "The person puts a laptop, book, two t-shirts, and pens in the backpack.",
            "3": "The person puts two t-shirts, pens, a book, and a laptop in the backpack.",
            "1": "The person puts a laptop, book, pens, and two t-shirts in the backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_40",
        "captions": {
            "2": "The person puts a book, hoodie, laptop, and pen in the backpack.",
            "3": "The person puts a pen, laptop, hoodie, and book in the backpack.",
            "1": "The person puts a hoodie, book, laptop, and pen in the backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_41",
        "captions": {
            "2": "Person packs headphones, wallet, cream-tube, two books, and then pouch into backpack.",
            "3": "Person packs cream-tube, two books, headphones, wallet, and then pouch into backpack.",
            "1": "Person packs wallet, headphones, cream-tube, two books, and then pouch into backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_42",
        "captions": {
            "2": "A person puts two books, a pouch, and then a laptop in a backpack.",
            "3": "A person puts a laptop, two books, and then a pouch in a backpack.",
            "1": "A person puts a pouch, two books, and then a laptop in a backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_43",
        "captions": {
            "2": "Person places a jar, a bottle, remote-control, mobile-charger, fork and spoon, and then a pen into a backpack.",
            "3": "Person places a mobile-charger, a jar, a pen, remote-control, fork and spoon, and then a bottle into a backpack.",
            "1": "Person places a jar, remote-control, bottle, mobile-charger, fork and spoon, and then a pen into a backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_44",
        "captions": {
            "2": "The person puts two books, a cell-phone, a pen, and then glasses in the backpack.",
            "3": "The person puts glasses, a pen, a cell-phone and then two books in the backpack.",
            "1": "The person puts a cell-phone, two books, a pen, and then glasses in the backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_45",
        "captions": {
            "2": "Person packing a backpack with one book, then a white box, and finally another book.",
            "3": "Person packing a backpack with a white box followed by two books.",
            "1": "Person packing a backpack with two books followed by a white box."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_46",
        "captions": {
            "2": "The person puts books, pens, and clothes into the backpack in that order.",
            "3": "The person puts clothes, pens, and books into the backpack in that order.",
            "1": "The person puts pens, books, and clothes into the backpack in that order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_47",
        "captions": {
            "2": "The person puts a diary, followed by a bottle, and then a smartwatch in the backpack.",
            "3": "The person puts a smartwatch, followed by a diary, and then a bottle in the backpack.",
            "1": "The person puts a bottle, followed by a diary, and then a smartwatch in the backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_48",
        "captions": {
            "2": "Person packs a backpack with a tshirt, banana, and pencil in that order.",
            "3": "Person packs a backpack with a pencil, tshirt, and banana in that order.",
            "1": "Person packs a backpack with a banana, tshirt, and pencil in that order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_49",
        "captions": {
            "2": "A person puts two books, two pens, a tablet, and clothing in a backpack in that order.",
            "3": "A person puts two pens, a tablet, two books, and clothing in a backpack in that order.",
            "1": "A person puts two books, a tablet, two pens, and clothing in a backpack in that order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_50",
        "captions": {
            "2": "A person puts one piece of clothing into a backpack, followed by two books, then adds the second piece of clothing.",
            "3": "A person puts two books into a backpack, followed by two pieces of clothing.",
            "1": "A person puts two pieces of clothing followed by two books into a backpack."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_51",
        "captions": {
            "2": "Person packs a backpack by putting books, a laptop, pens, clothing, and a sweater in that order.",
            "3": "Person packs a backpack by putting a sweater, pens, books, a laptop, and clothing in that order.",
            "1": "Person packs a backpack by putting a laptop, books, pens, clothing, and a sweater in that order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_52",
        "captions": {
            "2": "The letters on the table at the end are e, r, h.",
            "3": "The letters on the table at the end are h, e, r.",
            "1": "The letters on the table at the end are r, e, h."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_53",
        "captions": {
            "2": "The letters on the table at the end are w, e, f, i.",
            "3": "The letters on the table at the end are e, w, i, f.",
            "1": "The letters on the table at the end are w, f, e, i."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_54",
        "captions": {
            "2": "The letters on the table at the end are s, o, r, e, t.",
            "3": "The letters on the table at the end are t, o, e, r, s.",
            "1": "The letters on the table at the end are r, s, o, e, t."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_55",
        "captions": {
            "2": "The letters on the table at the end are t, r, and a.",
            "3": "The letters on the table at the end are a, r, and t.",
            "1": "The letters on the table at the end are r, t, and a."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_56",
        "captions": {
            "2": "The letters on the table at the end are ordered: g, b, a.",
            "3": "The letters on the table at the end are ordered: b, a, g.",
            "1": "The letters on the table at the end are ordered: g, a, b."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_57",
        "captions": {
            "2": "The letters on the table at the end are in the order q, o, x, k.",
            "3": "The letters on the table at the end are in the order k, o, q, x.",
            "1": "The letters on the table at the end are in the order q, x, o, k."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_58",
        "captions": {
            "2": "Letters on the table at the end are arranged as x, e, s.",
            "3": "Letters on the table at the end are arranged as s, x, e.",
            "1": "Letters on the table at the end are arranged as e, x, s."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_59",
        "captions": {
            "2": "The letters on the table at the end are arranged as a, e, l, m.",
            "3": "The letters on the table at the end are arranged as e, a, m, l.",
            "1": "The letters on the table at the end are arranged as a, l, e, m."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_60",
        "captions": {
            "2": "The video shows the letters n, x, m, and h arranged on a table at the end.",
            "3": "The video shows the letters h, x, m, and n arranged on a table at the end.",
            "1": "The video shows the letters n, m, x, and h arranged on a table at the end."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_61",
        "captions": {
            "2": "Letters on the table are arranged in the order: a, d, c, b, f, e.",
            "3": "Letters on the table are arranged in the order: f, e, d, c, b, a.",
            "1": "Letters on the table are arranged in the order: d, a, c, b, e, f."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_62",
        "captions": {
            "2": "Letters on the table in the order: n, b, a.",
            "3": "Letters on the table in the order: a, n, d.",
            "1": "Letters on the table in the order: b, n, a."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_63",
        "captions": {
            "2": "The final order of the letters on the table is e, t, h.",
            "3": "The final order of the letters on the table is h, e, t.",
            "1": "The final order of the letters on the table is t, e, h."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_64",
        "captions": {
            "2": "The letters on the table at the end are arranged as a, c, b, g, d, o.",
            "3": "The letters on the table at the end are arranged as g, o, a, b, c, d.",
            "1": "The letters on the table at the end are arranged as a, b, c, d, g, o."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_65",
        "captions": {
            "2": "The letters on the table at the end are a, e, r.",
            "3": "The letters on the table at the end are r, a, e.",
            "1": "The letters on the table at the end are e, a, r."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_66",
        "captions": {
            "2": "Letters t, s, m, r, and o arranged on the table at the end.",
            "3": "Letters s, r, m, t, and o arranged on the table at the end.",
            "1": "Letters t, m, r, s, and o arranged on the table at the end."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_67",
        "captions": {
            "2": "The letters on the table at the end are g, i, h, j, k, l.",
            "3": "The letters on the table at the end are l, k, j, h, i, g.",
            "1": "The letters on the table at the end are g, h, i, j, k, l."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_68",
        "captions": {
            "2": "Letters on the table at the end are a, f, t.",
            "3": "Letters on the table at the end are t, a, f.",
            "1": "Letters on the table at the end are f, a, t."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_69",
        "captions": {
            "2": "The letters p, a, l, and y are arranged on the table at the end.",
            "3": "The letters y, a, p, and l are arranged on the table at the end.",
            "1": "The letters p, l, a, and y are arranged on the table at the end."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_70",
        "captions": {
            "2": "The person shows the letters in the order: a, c, b, e, d.",
            "3": "The person shows the letters in the order: e, d, c, b, a.",
            "1": "The person shows the letters in the order: a, b, c, d, e."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_71",
        "captions": {
            "2": "Person showing letters i, h, and j in order.",
            "3": "Person showing letters j, i, and h in order.",
            "1": "Person showing letters h, i, and j in order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_72",
        "captions": {
            "2": "Person shows the letters 'a', 'c', 'b', and 'd' in order.",
            "3": "Person shows the letters 'd', 'b', 'a' and 'c' in order.",
            "1": "Person shows the letters 'a', 'b', 'c', and 'd' in order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_73",
        "captions": {
            "2": "The person showed the letters a, b, o, h in order.",
            "3": "The person showed the letters o, h, a, b in order.",
            "1": "The person showed the letters a, o, b, h in order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_74",
        "captions": {
            "2": "The person showed the letters: x, a, h, k, n, m.",
            "3": "The person showed the letters: k, m, n, x, h, a.",
            "1": "The person showed the letters: x, h, a, k, m, n."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_75",
        "captions": {
            "2": "The person showed the letters s, r, and t in sequence.",
            "3": "The person showed the letters t, s, and r in sequence.",
            "1": "The person showed the letters r, s, and t in sequence."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_76",
        "captions": {
            "2": "Person sequentially shows letters t, d, m, and l.",
            "3": "Person sequentially shows letters l, m, t and d.",
            "1": "Person sequentially shows letters t, m, d, and l."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_77",
        "captions": {
            "2": "A person shows the letters a, c, b, j, i, h in order.",
            "3": "A person shows the letters h, b, a, c, i, j in order.",
            "1": "A person shows the letters a, b, c, h, i, j in order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_78",
        "captions": {
            "2": "The person showed the letters a, e, and x in order.",
            "3": "The person showed the letters e, x, and a in order.",
            "1": "The person showed the letters a, x, and e in order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_79",
        "captions": {
            "2": "Person displaying the letters b, o, and d in sequence.",
            "3": "Person displaying the letters d, b, and o in sequence.",
            "1": "Person displaying the letters o, b, and d in sequence."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_80",
        "captions": {
            "2": "Person showing letters in order: h, g, i.",
            "3": "Person showing letters in order: i, h, g.",
            "1": "Person showing letters in order: g, h, i."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_81",
        "captions": {
            "2": "The person showed the letters e, l, and f in order.",
            "3": "The person showed the letters l, f, and e in order.",
            "1": "The person showed the letters e, f, and l in order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_82",
        "captions": {
            "2": "The letters appear in the order: r, e, d.",
            "3": "The letters appear in the order: r, d, e.",
            "1": "The letters appear in the order: e, r, d."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_83",
        "captions": {
            "2": "The video ends with the letters in the order: b, x, o.",
            "3": "The video ends with the letters in the order: o, x, b.",
            "1": "The video ends with the letters in the order: x, b, o."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_84",
        "captions": {
            "2": "Person showing the letters a, c, t in order.",
            "3": "Person showing the letters t, a, c in order.",
            "1": "Person showing the letters c, a, t in order."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_85",
        "captions": {
            "2": "The letters on the table at the end are g, r, e, c, a.",
            "3": "The letters on the table at the end are e, c, a, g, r.",
            "1": "The letters on the table at the end are g, e, r, a, c."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_86",
        "captions": {
            "2": "The letters on the table at the end are n, r, e, l, a.",
            "3": "The letters on the table at the end are l, a, e, n, r.",
            "1": "The letters on the table at the end are n, e, l, r, a."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_87",
        "captions": {
            "2": "The letters on the table at the end are c, o, l, a, y, p, s.",
            "3": "The letters on the table at the end are s, y, c, p, o, l, a.",
            "1": "The letters on the table at the end are c, l, a, o, y, p, s."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_88",
        "captions": {
            "2": "Letters n, m, e, o, and y are arranged on the table at the end.",
            "3": "Letters y, e, o, m, and n are arranged on the table at the end.",
            "1": "Letters m, o, n, e, and y are arranged on the table at the end."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_89",
        "captions": {
            "2": "The letters on the table at the end are arranged as d, o, g.",
            "3": "The letters on the table at the end are arranged as g, d, o.",
            "1": "The letters on the table at the end are arranged as o, d, g."
        },
        "dataset": "perception_test",
        "aspect": "order"
    },
    {
        "video": "order_90",
        "captions": {
            "2": "Person sat down on the sofa, switched on the lamp and then took off their shoes.",
            "3": "Person took off their shoe, then puts it back on before sitting on the sofa.",
            "1": "Person switched on the lamp, sat down on the sofa and then took off their shoes."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_91",
        "captions": {
            "2": "A person sits on the bed, jumps to the door and puts down the broom.",
            "3": "A person jumps to the door puts down the broom and sits on the bed.",
            "1": "A person sits on the bed, puts down the broom and jumps to the door."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_92",
        "captions": {
            "2": "The person takes the clothes out, rinses them and puts down the bag.",
            "3": "The person rinses the clothes, takes them out and puts down the bag.",
            "1": "The person puts down the bag, takes the clothes out and rinses them."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_93",
        "captions": {
            "2": "Person picks up a banana, holds the blanket and moves a jar.",
            "3": "Person moves a jar, holds the blanket and picks up a banana.",
            "1": "Person holds the blanket, picks up a banana and moves a jar."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_94",
        "captions": {
            "2": "The person opened the laptop, sneezes and then eats some food.",
            "3": "The person sneezes, eats some food and then opened the laptop.",
            "1": "The person opened the laptop, eats some food and then sneezes."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_95",
        "captions": {
            "2": "The person holds the dish, grooms his face and then puts down the food.",
            "3": "The person grooms his face, puts down the food and then holds the dish.",
            "1": "The person holds the dish, puts down the food and then grooms his face."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_96",
        "captions": {
            "2": "The person picks up the clothes from the bed.Another person enters the room and throws shoes onto the ground. The first person tidies the clothes up.",
            "3": "A person enters the room and throws shoes onto the ground. Another person picks up the clothes from the bed and tidies them up.",
            "1": "The person picks up the clothes from the bed and tidies them up. Another person enters the room and throws shoes onto the ground."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_97",
        "captions": {
            "2": "A person tidies up the table, and then throws away food waste.",
            "3": "A person tidies up the table, pushes in the chairs and then throws away food waste.",
            "1": "A person throws away food waste, and then tidies up the table."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_98",
        "captions": {
            "2": "The person closes some books, picks up a towel and adjusts the table.",
            "3": "The person picks up a towel, adjusts the table and then closes some books.",
            "1": "The person closes some books, adjusts the table and then picks up a towel."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_99",
        "captions": {
            "2": "The person takes off a jacket, picks up a towel and then puts the jacket in the basket.",
            "3": "The person picks up the towel, takes off a jacket and then puts it in a basket.",
            "1": "The person takes off a jacket, puts it in a basket and then picks up a towel."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_100",
        "captions": {
            "2": "The person folds the clothes, takes a dish and then puts the clothes aside.",
            "3": "The person takes a dish, folds the clothes and then puts them aside.",
            "1": "The person folds the clothes, puts them aside and then takes a dish."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_101",
        "captions": {
            "2": "A person lies down on the floor, holds a pillow before getting up to exit the room.",
            "3": "A person gets up to exit the room, lies down on the floor before holding a pillow.",
            "1": "A person holds a pillow, lies down on the floor before getting up to exit the room."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_102",
        "captions": {
            "2": "The person adjusts the door latch, opens the door and then exits the room with a towel.",
            "3": "The person exits the room with a towel, adjusts the door latch and then opens the door.",
            "1": "The person opens the door, adjusts the door latch and then exits the room with a towel."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_103",
        "captions": {
            "2": "The person opens the bag, leaves the room and then takes out several books.",
            "3": "The person leaves the room, opens the bag and then takes out several books.",
            "1": "The person opens the bag, takes out several books and then leaves the room."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_104",
        "captions": {
            "2": "Person gets out of bed, eats food before tidying up some clothes.",
            "3": "Person eats good, gets out of bed and then tidies up some clothes.",
            "1": "Person gets out of bed, tidies up some clothes before eating food."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_105",
        "captions": {
            "2": "Person puts down a bag, lies down on the couch and then enters the kitchen.",
            "3": "Person lies down on the couch, enters the kitchen and then puts down a bag.",
            "1": "Person puts down a bag, enters the kitchen and then lies down on the couch."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_106",
        "captions": {
            "2": "The person picks up a blanket, eats food and then exits the room.",
            "3": "The person exits the room, picks up a blanket and then eats food.",
            "1": "The person eats food, picks up a blanket and then exits the room."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_107",
        "captions": {
            "2": "The person kicks off shoes, sweeps the floor and then sits down to read a book.",
            "3": "The person sweeps the floor, sits down to read a book and then kicks off shoes.",
            "1": "The person kicks off shoes, sits down to read a book and then gets up to sweep the floor."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_108",
        "captions": {
            "2": "The person picks up a blanket, eats food and then puts the blanket on his lap.",
            "3": "The person picks up a blanket, puts it on his lap and then eats good.",
            "1": "The person eats food, picks up a blanket and then puts it on his lap."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_109",
        "captions": {
            "2": "The person walks into the kitchen, washes a cup and then puts down a box.",
            "3": "The person puts down the box, walks into the kitchen and then washes a cup.",
            "1": "The person walks into the kitchen, puts down a box and washes a cup."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_110",
        "captions": {
            "2": "Person picks up a bag, takes out a laptop and then sits on the floor.",
            "3": "Person sits on the floor, picks up a bag and takes out a laptop.",
            "1": "Person picks up a bag, sits on the floor and then takes out a laptop."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_111",
        "captions": {
            "2": "A person enters the room and another person watches the television. The first person then picks up the book.",
            "3": "A person enters the room and picks up a book. Another person watches the television.",
            "1": "A person watches the television. Another person enters the room, and then picks up a book."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_112",
        "captions": {
            "2": "The person sits down, picks up the pillow and then dusts off the picture.",
            "3": "The person dusts off the picture, sits down and then picks up the pillow.",
            "1": "The person picks up a pillow, sits down and then dusts off the picture."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_113",
        "captions": {
            "2": "The person enters the room, picks up the blanket and puts down the laptop.",
            "3": "The person picks up the blanket, puts down the laptop and enters the room.",
            "1": "The person enters the room, puts down the laptop and picks up the blanket."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_114",
        "captions": {
            "2": "The person picks up a notebook, throws the shoes and exits the room.",
            "3": "The person exits the room, throws the shoes and then picks up a notebook.",
            "1": "The person throws the shoes, picks up a notebook and exits the room."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_115",
        "captions": {
            "2": "Person puts the laptop into the bag, opens the bag and stands up.",
            "3": "Person stands up, opens a bag and puts the laptop into the bag.",
            "1": "Person opens a bag, puts the laptop into the bag and stands up."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_116",
        "captions": {
            "2": "The person runs into the room, closes the door and then lies on the bed.",
            "3": "The person closes the door, runs into the room and then lies on the bed.",
            "1": "The person runs into the room, lies on the bed and then closes the door."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_117",
        "captions": {
            "2": "The person enters the room, puts down the book and then closes the door.",
            "3": "The person puts down the book, enters the room and then closes the door.",
            "1": "The person enters the room, closes the door and then puts down the book."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_118",
        "captions": {
            "2": "A person lifts up a book, then lifts up a picture before putting the picture in the book.",
            "3": "A person puts a picture in a book, lifts up the book before lifting up the picture.",
            "1": "A person lifts up a picture, then lifts up a book before putting the picture in the book."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_119",
        "captions": {
            "2": "A person sits at the table and then leaves the table. Another person then puts a box on the table.",
            "3": "A person leaves the table. Another person then puts a box on the table, before the first person sits back at the table.",
            "1": "A person sits at the table. Another person then puts a box on the table, before the first person leaves the table."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_120",
        "captions": {
            "2": "Person takes off a jacket, vacuums the floor and puts clothes into a washing machine.",
            "3": "Person puts clothes into a washing machine, takes off a jacket and then vacuums the floor.",
            "1": "Person vacuums the floor, takes off a jacket and puts clothes into a washing machine."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_121",
        "captions": {
            "2": "The person picked up a towel, opened the closet door and then threw the towel.",
            "3": "The person picked up a towel, threw it and then opened the closet door.",
            "1": "The person opened the closet door, picked up a towel and then threw it."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_122",
        "captions": {
            "2": "The person puts a towel in a box, then puts clothes in the box, and finally puts the box on the floor.",
            "3": "The person puts a box on the floor, puts a towel in the box and finally puts clothes in the box.",
            "1": "The person puts clothes in a box, then puts a towel in the box, and finally puts the box on the floor."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_123",
        "captions": {
            "2": "The person gets up from the bed, exits the room and then takes the rowel.",
            "3": "The person takes the towel, gets up from the bed and then exits the room.",
            "1": "The person gets up from the bed, takes the towel and then exits the room."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_124",
        "captions": {
            "2": "Person drinks from a glass, and then puts down the box before putting down the glass.",
            "3": "Person puts down the glass, and then puts down the box before drinking from the glass.",
            "1": "Person puts down the box, and then drinks from a glass before putting down the glass."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_125",
        "captions": {
            "2": "The person takes a jar from the cupboard, puts the pan down and puts the jar down.",
            "3": "The person takes a jar from the cupboard, puts the jar down and puts the pan down.",
            "1": "The person puts the pan down, takes a jar from the cupboard and puts the jar down."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_126",
        "captions": {
            "2": "The person puts down the bag, looks out the window and sat on the bed.",
            "3": "The person sat on the bed, puts down the bag and looks out the window.",
            "1": "The person looks out the window, puts down the bag and sat on the bed."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_127",
        "captions": {
            "2": "Person closes the laptop, sweeps the floor and puts down the broom.",
            "3": "Person exits the room, closes the laptop and sweeps the floor.",
            "1": "Person sweeps the floor, closes a laptop and exits the room."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_128",
        "captions": {
            "2": "Person puts down a laptop, drinks from a glass and then sits down before opening the laptop.",
            "3": "Person opens the laptop, puts down the laptop and then drinks from a glass before sitting down.",
            "1": "Person puts down a laptop, sits down and then drinks from a glass before opening the laptop."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_129",
        "captions": {
            "2": "Person reads a book, drinks from a bottle and then opens a file.",
            "3": "Person open a file, reads a book and then drinks from a bottle.",
            "1": "Person drinks from a bottle, reads a book and then opens a file."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_130",
        "captions": {
            "2": "Person wipes the door with a cloth, sneezes and picks up a notebook.",
            "3": "Person picks up a notebook, sneezes and wipes the door with a cloth.",
            "1": "Person wipes the door with a cloth, picks up a notebook and sneezes."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_131",
        "captions": {
            "2": "The person places a box on the bed, puts objects in the box and picks up the blanket.",
            "3": "The person picks up the blanket, places a box on the bed and puts objects in the box.",
            "1": "The person puts objects in a box, places the box on the bed and picks up the blanket."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_132",
        "captions": {
            "2": "The person sits down, takes off shoes and writes on a notebook.",
            "3": "The person takes off shoes, sits down and writes on a notebook.",
            "1": "The person sits down, writes on a notebook and takes off shoes."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_133",
        "captions": {
            "2": "Person paces around the room, picks up a phone call and then picks up a bag of food.",
            "3": "Person picks up a phone call, picks up a bag of food and then paces around the room.",
            "1": "Person paces around the room, picks up a bag of food and then picks up a phone call."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_134",
        "captions": {
            "2": "Person is answering a phone call before taking a blanket and sitting on the floor.",
            "3": "Person is taking a blanket before sitting on the floor and answering a phone call.",
            "1": "Person is answering a phone call before sitting on the floor and taking a blanket."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_135",
        "captions": {
            "2": "The person eats food, and stands up before opening a file to read.",
            "3": "The person stands up, opens a file to read before eating food.",
            "1": "The person eats food, opens a file to read it before standing up."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_136",
        "captions": {
            "2": "Person wipes their face with the towel, and wipes their body with the towel before opening the door.",
            "3": "Person wipes their body with the towel, opens the door and then wipes their face with the towel.",
            "1": "Person wipes their face with the towel, opens the door and then wipes their body with the towel."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_137",
        "captions": {
            "2": "The person sits down at the table, drinks from a cup and eats food from a bowl.",
            "3": "The person drinks from a cup, eats food from a bowl and sits down at the table.",
            "1": "The person sits down at the table, eats food from a bowl and drinks from a cup."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_138",
        "captions": {
            "2": "Person stands up, picks up a book and holds a blanket.",
            "3": "Person picksup a book, holds a blanket and stands up.",
            "1": "Person stands up, holds a blanket and picks up a book."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_139",
        "captions": {
            "2": "The person writes on a book, puts down the book and then eats food.",
            "3": "The person puts down the book, eats food and writes on a book.",
            "1": "The person writes on a book, eats food and then puts down the book."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_140",
        "captions": {
            "2": "Person washes clothes, eats food off a plate and sits on the floor.",
            "3": "Person eats food off a plate, washes clothes and sits on the floor.",
            "1": "Person washes clothes, sits on the floor and eats food off a plate."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_141",
        "captions": {
            "2": "Person sits down on the couch, throws the pillow and opens the laptop.",
            "3": "Person throws the pillow, sits down on the couch and opens the laptop.",
            "1": "Person sits down on the couch, opens the laptop and throws the pillow."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_142",
        "captions": {
            "2": "The person puts down the laptop, sits down on the sofa, opens the laptop and types on it.",
            "3": "The person opens the laptop, puts it down and types on it before sitting down on the sofa.",
            "1": "The person sits down on the sofa, puts down the laptop before opening it, followed by typing on it."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_143",
        "captions": {
            "2": "The person wipes the table, removes objects from the chair and sweeps the floor.",
            "3": "The person sweeps the floor, removes objects from the chair and wipes the table.",
            "1": "The person removes objects from the chair, wipes the table and sweeps the floor."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_144",
        "captions": {
            "2": "The person holds a pillow, looks at a picture and gets up to exit the room.",
            "3": "The person gets up to exit the room, holds a pillow and looks at a picture.",
            "1": "The person looks at a picture, holds a pillow and gets up to exit the room."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_145",
        "captions": {
            "2": "The person picks up a towel, picks up the laptop and wraps the towel around their head.",
            "3": "The person picks up the laptop, picks up a towel and wraps it around their head.",
            "1": "The person picks up a towel, wraps it around their head and picks up the laptop."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_146",
        "captions": {
            "2": "Person dances, eats food from a plate and then types on a laptop.",
            "3": "Person eats food from a plate, types on a laptop and then dances.",
            "1": "Person dances, types on a laptop and then eats food from a plate."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_147",
        "captions": {
            "2": "The person cooks food on a stove, eats food and opens the refrigerator.",
            "3": "The person eats food, opens the refrigerator and cooks food on a stove.",
            "1": "The person cooks food on a stove, opens the refrigerator and eats food."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_148",
        "captions": {
            "2": "The person is lying on a couch. He then takes a notepad before taking a pillow.",
            "3": "The person takes a notepad before taking a pillow, and then lies on the couch.",
            "1": "The person is lying on a couch. He then takes a pillow before taking a notepad."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_149",
        "captions": {
            "2": "Person wipes the windows and closes the windows, before putting down the spray bottle and picking up a phone.",
            "3": "Person picks up a phone and closes the windows, before wiping the windows and putting down the spray bottle.",
            "1": "Person wipes the windows and puts down the spray bottle, before closing the windows and picking up a phone."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_150",
        "captions": {
            "2": "A person opens the cupboard, picks up a book and puts the book inside.",
            "3": "A person opens the cupboard, puts a book inside and picks up a book.",
            "1": "A person picks up a book, opens the cupboard and puts the book inside."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_151",
        "captions": {
            "2": "Person empties the glass of water, holds it and puts it in the refrigerator.",
            "3": "Person puts glass in the refrigerator, holds a glass of water and empties it.",
            "1": "Person is holding a glass of water, empties the glass and puts it in the refrigerator."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_152",
        "captions": {
            "2": "The person types on the laptop, and takes the food before closing the laptop.",
            "3": "The person takes the food, and types on the laptop before closing it.",
            "1": "The person types on the laptop, and closes it before taking the food."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_153",
        "captions": {
            "2": "Person picks up a towel, opens the door before throwing the towel.",
            "3": "Person opens the door, picks up a towel and throws it.",
            "1": "Person picks up a towel, throws it before opening the door."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_154",
        "captions": {
            "2": "The person wipes their face with a paper towel, opens the refrigerator and throws the paper towel away.",
            "3": "The person wipes their face with a paper towel, throws it away and opens the refrigerator.",
            "1": "The person opens the refrigerator, wipes their face with a paper towel and throws it away."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_155",
        "captions": {
            "2": "Person sits down on a chair, takes a plate and picks up a towel.",
            "3": "Person picks up a towel, sits down on a chair and takes a plate.",
            "1": "Person takes a plate, sits down on a chair and picks up a towel."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_156",
        "captions": {
            "2": "The person opens a box, puts it in the wardrobe before putting the towel inside the box and closing it.",
            "3": "The person puts the box in the wardrobe, and opens it before putting the towel inside and closing the box.",
            "1": "The person opens a box, puts the towel inside, closes the box and puts it in the wardrobe."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_157",
        "captions": {
            "2": "Person picks up a book, takes off a shirt and runs out of the room.",
            "3": "Person runs out of the room, picks up a book and takes off a shirt.",
            "1": "Person takes off a shirt, picks up a book and runs out of the room."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_158",
        "captions": {
            "2": "Person puts down a laptop, types on it and takes a book.",
            "3": "Person takes a book, puts a laptop down and types on it.",
            "1": "Person types on a laptop, puts it down and takes a book."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_159",
        "captions": {
            "2": "The person takes items from the cupboard, and then takes items from the refrigerator. After that, the person prepares food in a pot.",
            "3": "The person prepares food in a pot. After that, the person takes items from the cupboard and then takes items from the refrigerator.",
            "1": "The person takes items from the refrigerator, and then takes items from the cupboard. After that, the person prepares food in a pot."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_160",
        "captions": {
            "2": "Person sits down before getting up, putting the blanket on the chair and then eats from a dish.",
            "3": "Person putsthe blanket on the chair, sits down and eat from a dish before getting up.",
            "1": "Person sits down and eats from a dish, before getting up and putting the blanket on the chair."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_161",
        "captions": {
            "2": "A person enters the room and closes the door. Another person watches television while the first person wipes the windows.",
            "3": "A person enters the room, wipes the windows and then closes the doors. Another person watches television.",
            "1": "A person watches television. Another person enters the room, closes the door and wipes the windows."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_162",
        "captions": {
            "2": "The person takes off a shirt, looks at a mirror and throws a pillow onto the bed.",
            "3": "The person looks at a mirror, throws a pillow onto the bed and takes off a shirt.",
            "1": "The person takes off a shirt, throws a pillow onto the bed and looks at a mirror."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_163",
        "captions": {
            "2": "A person picks up a plate, keeps items in a cupboard and eats food from the plate.",
            "3": "A person picks up a plate and eats food from it, and keeps items in a cupboard.",
            "1": "A person keeps items in a cupboard, picks up a plate and eats food from it."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_164",
        "captions": {
            "2": "Person opens the closet, takes a book and leaves the room before opening it.",
            "3": "Person leaves the room, opens the closet and takes the book before opening it.",
            "1": "Person opens the closet, takes a book and opens it before leaving the room."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_165",
        "captions": {
            "2": "Person wipes the shelves, eats some snacks and then tidies the table before wiping the table.",
            "3": "Person wipes the table, tidies the table, and then eats some snacks before wiping the shelves.",
            "1": "Person eats some snacks, wipes the shelves, and then tidies the table before wiping the table."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_166",
        "captions": {
            "2": "The person picks up the pillow, and closes the door before putting the pillow on top of a shelf.",
            "3": "The person closes the door, and picks up a pillow before putting it on top of a shelf.",
            "1": "The person picks up the pillow, and puts the pillow on top of a shelf before closing the door."
        },
        "dataset": "mvbench",
        "aspect": "order"
    },
    {
        "video": "order_167",
        "captions": {
            "2": "Ingredients are tossed into the skillet, it starts smoking and then burns in fire.",
            "3": "The skillet burns in fire, ingredients are tossed into it and it starts smoking.",
            "1": "The skillet starts smoking, ingredients are tossed into it, and the skillet burns in fire."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_168",
        "captions": {
            "2": "Dolphins emerge from the water, swim towards the shore and get fed with fish.",
            "3": "Dolphins get fed with fish, swim towards the shore and emerges from the water.",
            "1": "Dolphins swim towards the shore, emerges from the water and get fed with fish."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_169",
        "captions": {
            "2": "The woman closes the door, puts clothes into the washing machine, and then operates the washing machine.",
            "3": "The woman operates the washing machine, puts clothes into the washing machine and then closes the door.",
            "1": "The woman puts clothes into the washing machine, closes the door, and then operates the washing machine."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_170",
        "captions": {
            "2": "A man takes off his jacket, does a twisting stretch and then jumps up to grab the bar with his hands.",
            "3": "A man jumps up to grab the bar with his hands, takes off his jacket and does a twisting stretch.",
            "1": "A man does a twisting stretch, takes off his jacket and then jumps up to grab the bar with his hands."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_171",
        "captions": {
            "2": "Players pass the ball to each other, shoots the basketball and then dribbles it.",
            "3": "Players shoot the basketball, dribbles the ball and then passes it to each other.",
            "1": "Players pass the ball to each other, dribble the ball and then shoots the basketball."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_172",
        "captions": {
            "2": "A person opens the curtains and then gets out of bed.",
            "3": "A person gets out of bed and then makes the bed before opening the curtains.",
            "1": "A person gets out of bed and then opens the curtains."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_173",
        "captions": {
            "2": "The ball goes into the goal post, the man leaps to his right and then stand up from the ground.",
            "3": "The man stands up from the ground and leaps to his right as the ball goes into the goal post.",
            "1": "The man leaps to his right as the ball goes into the goal post, and then he stands up from the ground."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_174",
        "captions": {
            "2": "The man's facial expressions change from serious to smiling, then laughing.",
            "3": "The man's facial expressions change from smiling to serious.",
            "1": "The man's facial expressions change from serious to smiling."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_175",
        "captions": {
            "2": "Person smiles at the glass, drinks a glass of water and then turns back.",
            "3": "Person turns back, smiles at the glass and then drinks a glass of water.",
            "1": "Person drinks a glass of water, smiles at the glass and then turns back."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_176",
        "captions": {
            "2": "A squirrel bites the nut, picks it up and then turns around.",
            "3": "A squirrel turns around, bites the nut and then picks it up.",
            "1": "A squirrel picks up a nut, bites it and then turns around."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_177",
        "captions": {
            "2": "The fighter jet first shoots two missiles, then shoots one missile.",
            "3": "The fighter jet first shoots two missiles, then shoots one missile before performing an aerial maneuver",
            "1": "The fighter jet first shoots one missile, then shoots two missiles."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_178",
        "captions": {
            "2": "A black vegetable is placed into the trolley. The person then pushes the trolley forward and places a black vegetable into the trolley.",
            "3": "The person pushes the trolley forward, and puts a black vegetable into the trolley followed by a green vegetable.",
            "1": "A green vegetable is placed into the trolley. The person then pushes the trolley forward and places a black vegetable into the trolley."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_179",
        "captions": {
            "2": "The street was first filled with people, then with vehicles, and continues to be filled with vehicles.",
            "3": "The street was first filled with vehicles, then with people, and continues to be filled with people.",
            "1": "The street was first filled with people, then with vehicles, followed by people again."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_180",
        "captions": {
            "2": "A person enters a password, scrolls through the settings menu, selects the security section.",
            "3": "A person enters a password, selects the security section and scrolls through the settings menu.",
            "1": "A person scrolls through the settings menu, selects the security section, and then enters a password."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_181",
        "captions": {
            "2": "The bear turns around, looks a river and catches a fish jumping out of the water",
            "3": "The bear turns around, catches a fish jumping out of the water and looks at the river.",
            "1": "The bear looks a river, catches a fish jumping out of the water and then turns around."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_182",
        "captions": {
            "2": "Children dive into water, swim to the surface and then makes gestures to the camera",
            "3": "Children emerges from the water, makes gestures to the camera and dives back into the water.",
            "1": "Children dive into the water, makes gestures to the camera, and then swim to the surface."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_183",
        "captions": {
            "2": "A beaker is shown first, followed by a line of test tubes.",
            "3": "A beaker is shown first, followed by a flask of liquid and then a line of test tubes",
            "1": "A line of test tubes is shown first, followed by a beaker."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_184",
        "captions": {
            "2": "A girl reads a book, a body of water is shown and the camera pans out.",
            "3": "A body of water is shown, and the camera pans out before a girl is shown reading a book.",
            "1": "A girl reads a book, the camera pans out and a body of water is shown."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_185",
        "captions": {
            "2": "A family of people is drawn first, followed by the house being drawn. After that, a tree and the sun are added.",
            "3": "A tree and the sun are drawn first, followed by the house being drawn. After that, a family of people are added.",
            "1": "A house is first drawn, followed by a family of people. After that, a tree and the sun are added."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_186",
        "captions": {
            "2": "Several birds flies onto the back of a hippo. Some birds fly away and another hippo pokes its head out of the water.",
            "3": "Some birds fly away before a hippo pokes its head out of the water. Several birds then fly onto the back of another hippo.",
            "1": "Several birds flies onto the back of a hippo. Another hippo pokes its head out of the water before some birds fly away."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_187",
        "captions": {
            "2": "A cargo train is shown first, followed by a container ship and then a cargo truck",
            "3": "A cargo truck is shown first, followed by a cargo train and then a container ship.",
            "1": "A container ship is shown first, followed by a cargo train and then a cargo truck."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_188",
        "captions": {
            "2": "A person enters the scene, the robot vacuum cleaner starts moving and the person attaches an accessory to the robot vacuum cleaner.",
            "3": "The robot vacuum cleaner starts moving, and then a person enters the screen and attaches an accessory to the robot vacuum cleaner.",
            "1": "A person enters the scene, attaches an accessory to the robot vacuum cleaner, and then it starts moving."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_189",
        "captions": {
            "2": "A woman paints a wall while walking and continues walking while turning back to look at the wall.",
            "3": "A woman turns back to look at the wall, and continues walking before painting the wall while walking.",
            "1": "A woman paints a wall while walking, turns back to look at the wall and continues walking."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_190",
        "captions": {
            "2": "The person writes something on the paper note, stands it up and then adjusts it.",
            "3": "The person stands the paper note up, writes something on it and then adjusts it.",
            "1": "The person adjusts the paper note, writes something on it and then stands it up."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_191",
        "captions": {
            "2": "A little girl is drawn first, then erased before a man is being drawn.",
            "3": "A man is drawn first, then erased before a little girl is being drawn.",
            "1": "A little girl is drawn first, followed by a man being drawn. Both drawings are then erased"
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_192",
        "captions": {
            "2": "The bowl rotates, then food is added into the bowl before food is removed from the bowl.",
            "3": "Food is removed from the bowl, then the bowl rotates, before food is added into the bowl.",
            "1": "Food is added into the bowl, then the bowl rotates, before food is removed from the bowl."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_193",
        "captions": {
            "2": "The man propels himself forward in a wheelchair, hands a document to colleagues and greeted someone.",
            "3": "The man hands the document to colleagues, greeted someone and propels himself forward in a wheelchair and stops briefly to a pick a document.",
            "1": "The man propels himself forward in a wheelchair, greeted someone and then hands a document to colleagues."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_194",
        "captions": {
            "2": "The camera pans upwards, the chef smiles at the camera and then shapes dough.",
            "3": "The chef smiles at the camera, the camera pans upwards and then he shapes dough.",
            "1": "The chef shapes dough, the camera pans upwards and he smiles at the camera."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_195",
        "captions": {
            "2": "The salesman passes a bag to the woman, packs bread and then says something to her.",
            "3": "The salesman says something to the woman, packs bread and then passes a bag to her.",
            "1": "The salesman packs bread, passes a bag to the woman and then says something to her."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_196",
        "captions": {
            "2": "A woman does push ups, smiles at the camera and then lies on the ground.",
            "3": "A woman smiles at the camera, lies on the ground and then does push ups.",
            "1": "A woman does push ups, lies on the ground and then smiles at the camera."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_197",
        "captions": {
            "2": "A man chats with a woman while walking, continues walking and then stops to show something on his tablet to her.",
            "3": "A man continues walking, stops to show something on his tablet to a woman and then chats with her while walking.",
            "1": "A man chats with a woman while walking, stops to show something on his tablet to her and then continues walking."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_198",
        "captions": {
            "2": "Two hands hold a clay pot, followed by three hands holding it.",
            "3": "Two hands hold a clay pot. Then, a single hand holds it briefly before three hands hold the pot.",
            "1": "Three hands hold a clay pot, followed by two hands holding it."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_199",
        "captions": {
            "2": "Man glides on a skateboard, ties his shoelaces.",
            "3": "Man glides on a skateboard, ties his shoelaces and picks up the skateboard.",
            "1": "Man ties his shoelaces and then glides on a skateboard."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_200",
        "captions": {
            "2": "A man takes off his glasses, looks at his phone and then looks at his glasses.",
            "3": "A man takes off his glasses, looks at it and then looks at his phone.",
            "1": "A man looks at his phone, takes off his glasses and looks at it."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_201",
        "captions": {
            "2": "The woman samples water in a bottle first, operates her tablet and then shakes the bottle.",
            "3": "The woman operates her tablet first, samples water in a bottle and then shakes the bottle.",
            "1": "The woman samples water in a bottle first, shakes the bottle and then operates her tablet."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_202",
        "captions": {
            "2": "A woman measures her pregnant belly with a measuring tape and then touches it.",
            "3": "A woman measures her pregnant belly with a measuring tape and then touches it before reading a pregnancy book.",
            "1": "A woman touches her pregnant belly and then measures it with a measuring tape."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_203",
        "captions": {
            "2": "Sending an air kiss followed by dancing.",
            "3": "Sending an air kiss and winking followed by dancing",
            "1": "Dancing followed by sending an air kiss."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "order_204",
        "captions": {
            "2": "A man shows off his car key inside the car, he pulls back inside the car before a girl appears from the backseat.",
            "3": "A man pulls back inside the car, showing off his car key before a girl appears from the backseat.",
            "1": "A man shows off his car key inside the car, a girl appears from the backseat before he pulls back inside the car."
        },
        "dataset": "tempcompass",
        "aspect": "order"
    },
    {
        "video": "object_1",
        "captions": {
            "2": "Object zig-zags down a slanted plane.",
            "3": "Object bounces down a slanted plane.",
            "1": "Object rolling or sliding down a slanted plane."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_2",
        "captions": {
            "2": "The object zig-zags down the slanted plane.",
            "3": "The object bounces down the slanted plane.",
            "1": "The object rolls or slides down the slanted plane."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_3",
        "captions": {
            "2": "Object spins arounds on a slanted plane.",
            "3": "Object rolls down a slanted plane.",
            "1": "Object remains stationary on a slanted plane."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_4",
        "captions": {
            "2": "Serving a salad with cucumber, tomato, and salad leaves.",
            "3": "Consuming a salad with cucumber, tomato, and salad leaves.",
            "1": "Preparing a salad with cucumber, tomato, and salad leaves."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_5",
        "captions": {
            "2": "A person putting courgette, tomato, and salad leaves in a bowl.",
            "3": "A person putting carrot, tomato, and salad leaves in a bowl.",
            "1": "A person putting cucumber, tomato, and salad leaves in a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_6",
        "captions": {
            "2": "Preparing a bowl of cucumber, tomato, and carrots.",
            "3": "Preparing a bowl of cucumber, tomato, and blueberries.",
            "1": "Preparing a bowl of cucumber, tomato, and salad leaves."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_7",
        "captions": {
            "2": "Preparing a dish with cucumber, bell pepper, and salad leaves.",
            "3": "Preparing a dish with courgette, tomato, and bell pepper.",
            "1": "Preparing a dish with cucumber, tomato, and salad leaves."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_8",
        "captions": {
            "2": "Preparing a salad with boiled egg, onion, carrot, salad leaves, and tomato.",
            "3": "Preparing a salad with boiled egg, onion, carrot, salad leaves, and apple slices.",
            "1": "Preparing a salad with boiled egg, onion, cucumber, salad leaves, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_9",
        "captions": {
            "2": "Person adding tomato and cucumber to a bowl.",
            "3": "Person adding tomato and tofu to a bowl.",
            "1": "Person adding tomato and cabbage to a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_10",
        "captions": {
            "2": "Serving a salad with leaves, cabbage, cucumber, and tomato.",
            "3": "Disposing of a salad with leaves, cabbage, cucumber, and tomato.",
            "1": "Preparing a salad with leaves, cabbage, cucumber, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_11",
        "captions": {
            "2": "The person added tomato and asparagus to the bowl.",
            "3": "The person added tomato and flower petals to the bowl.",
            "1": "The person added tomato and salad leaves to the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_12",
        "captions": {
            "2": "Person preparing a dish with tomato and broccoli.",
            "3": "Person preparing a dish with tomato and peanuts.",
            "1": "Person preparing a dish with tomato and salad leaves."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_13",
        "captions": {
            "2": "Mixing onions, salad leaves, and tomato in a bowl.",
            "3": "Mixing orange slices, salad leaves, and tomato in a bowl.",
            "1": "Mixing cucumber, salad leaves, and tomato in a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_14",
        "captions": {
            "2": "A person prepares a bowl with cucumber, eggplant, and tomato.",
            "3": "A person prepares a bowl with zucchini, salad leaves, and carrots.",
            "1": "A person prepares a bowl with cucumber, salad leaves, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_15",
        "captions": {
            "2": "Person preparing a bowl with cucumber, peppers, and tomato.",
            "3": "Person preparing a bowl with carrots, peppers, and tomato.",
            "1": "Person preparing a bowl with cucumber, salad leaves, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_16",
        "captions": {
            "2": "Person serving a dish with tomato, gherkin, and salad leaves.",
            "3": "Person consuming a dish with tomato, gherkin, and salad leaves.",
            "1": "Person preparing a dish with tomato, gherkin, and salad leaves."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_17",
        "captions": {
            "2": "The person washed cheese, tomato, salad leaves, and onion in the bowl.",
            "3": "The person poured cheese, tomato, salad leaves, and onion out of the bowl.",
            "1": "The person added cheese, tomato, salad leaves, and onion to the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_18",
        "captions": {
            "2": "Person prepares a dish with cheese, tomato, asparagus, and onion.",
            "3": "Person prepares a dish with cheese, tomato, boiled egg, and onion.",
            "1": "Person prepares a dish with cheese, tomato, salad leaves, and onion."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_19",
        "captions": {
            "2": "Making a salad with leaves, cheese, cucumber, and corn.",
            "3": "Making a salad with leaves, cheese, apple slices, and corn.",
            "1": "Making a salad with leaves, cheese, tomato, and corn."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_20",
        "captions": {
            "2": "The person grates salad leaves, cucumber, and tomato in the bowl.",
            "3": "The person removes salad leaves, cucumber, and tomato from the bowl.",
            "1": "The person puts salad leaves, cucumber, and tomato in the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_21",
        "captions": {
            "2": "Person preparing a salad with leaves, cucumber, and carrot.",
            "3": "Person preparing a salad with beans, cucumber, and apple.",
            "1": "Person preparing a salad with leaves, cucumber, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_22",
        "captions": {
            "2": "Person refrigerates a dish with salad leaves and radish.",
            "3": "Person consumes a dish with salad leaves and radish.",
            "1": "Person prepares a dish with salad leaves and radish."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_23",
        "captions": {
            "2": "The person prepared a mixture of herbs, tomato, broccoli, and cucumber.",
            "3": "The person prepared a mixture of herbs, tomato, carrot, and broccoli.",
            "1": "The person prepared a mixture of herbs, tomato, salad leaves, and cucumber."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_24",
        "captions": {
            "2": "Person refrigerating a salad with cucumber, salad leaves, red cabbage, and tomato.",
            "3": "Person eating a salad with cucumber, salad leaves, red cabbage, and tomato.",
            "1": "Person preparing a salad with cucumber, salad leaves, red cabbage, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_25",
        "captions": {
            "2": "The person rinses tomato, carrot, and salad leaves in the bowl.",
            "3": "The person scrapes tomato, carrot, and salad leaves from the bowl.",
            "1": "The person puts tomato, carrot, and salad leaves in the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_26",
        "captions": {
            "2": "Person adding tomato, cucumber, and salad leaves to a bowl.",
            "3": "Person adding tomato, onion, and salad leaves to a bowl.",
            "1": "Person adding tomato, carrot, and salad leaves to a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_27",
        "captions": {
            "2": "Person refrigerating a salad with tomato, cucumber, olives, and salad leaves.",
            "3": "Person eating a salad with tomato, cucumber, olives, and salad leaves.",
            "1": "Person preparing a salad with tomato, cucumber, olives, and salad leaves."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_28",
        "captions": {
            "2": "A person tosses tomato, cabbage, and eggplant into a bowl or plate.",
            "3": "A person removes tomato, cabbage, and eggplant from a bowl or plate.",
            "1": "A person adds tomato, cabbage, and eggplant to a bowl or plate."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_29",
        "captions": {
            "2": "Making a salad with corn, onions, and tomato.",
            "3": "Making a salad with corn, grapes, and tomato.",
            "1": "Making a salad with corn, salad leaves, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_30",
        "captions": {
            "2": "The person rinsed carrot, tomato, cucumber, and salad leaves in the bowl.",
            "3": "The person removed carrot, tomato, cucumber, and salad leaves from the bowl.",
            "1": "The person added carrot, tomato, cucumber, and salad leaves to the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_31",
        "captions": {
            "2": "Serving corn, salad leaves, tomato, and cucumber in a bowl.",
            "3": "Consuming corn, salad leaves, tomato, and cucumber from a bowl.",
            "1": "Mixing corn, salad leaves, tomato, and cucumber in a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_32",
        "captions": {
            "2": "Person preparing a salad with tomato, cheese, and cucumbers.",
            "3": "Person preparing a salad with tomato, cheese, and banana slices.",
            "1": "Person preparing a salad with tomato, cheese, and salad leaves."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_33",
        "captions": {
            "2": "Person serves a dish with tomato, cheese, and salad leaves.",
            "3": "Person disposes of a dish with tomato, cheese, and salad leaves.",
            "1": "Person prepares a dish with tomato, cheese, and salad leaves."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_34",
        "captions": {
            "2": "The person grates salad leaves and tomato in the bowl.",
            "3": "The person pours salad leaves and tomato out of the bowl.",
            "1": "The person puts salad leaves and tomato in the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_35",
        "captions": {
            "2": "Person refrigerating a dish with cheese, salad leaves, and tomato.",
            "3": "Person consuming a dish with cheese, salad leaves, and tomato.",
            "1": "Person preparing a dish with cheese, salad leaves, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_36",
        "captions": {
            "2": "Person refrigerates a salad with onion, cucumber, and salad leaves.",
            "3": "Person eats a salad with onion, cucumber, and salad leaves.",
            "1": "Person prepares a salad with onion, cucumber, and salad leaves."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_37",
        "captions": {
            "2": "Person serves a bowl with salad leaves, sun dried tomato, and cheese.",
            "3": "Person eats a bowl with salad leaves, sun dried tomato, and cheese.",
            "1": "Person prepares a bowl with salad leaves, sun dried tomato, and cheese."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_38",
        "captions": {
            "2": "Person prepares a bowl with corn, cucumber, and salad leaves.",
            "3": "Person prepares a bowl with corn, apple slices, and asparagus.",
            "1": "Person prepares a bowl with corn, tomato, and salad leaves."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_39",
        "captions": {
            "2": "The person rinsed salad leaves, carrot, tomato, and cheese in the bowl.",
            "3": "The person poured salad leaves, carrot, tomato, and cheese out from the bowl.",
            "1": "The person added salad leaves, carrot, tomato, and cheese to the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_40",
        "captions": {
            "2": "Preparing a plate with tomato, peppers, and cucumber.",
            "3": "Preparing a plate with tomato, onions, and carrots.",
            "1": "Preparing a plate with tomato, salad leaves, and cucumber."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_41",
        "captions": {
            "2": "Person refrigerates a salad with tomato, cucumber, and salad leaves.",
            "3": "Person eats a salad with tomato, cucumber, and salad leaves.",
            "1": "Person prepares a salad with tomato, cucumber, and salad leaves."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_42",
        "captions": {
            "2": "A person serves a salad with tomato, salad leaves, and cucumber.",
            "3": "A person eats a salad with tomato, salad leaves, and cucumber.",
            "1": "A person prepares a salad with tomato, salad leaves, and cucumber."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_43",
        "captions": {
            "2": "Person prepares a salad with salad leaves, cucumber, and carrot.",
            "3": "Person prepares a salad with salad leaves, apple, and cucumber.",
            "1": "Person prepares a salad with salad leaves, tomato, and carrot."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_44",
        "captions": {
            "2": "Person prepares a dish with herbs, salad leaves, zucchini, and tomato.",
            "3": "Person prepares a dish with herbs, salad leaves, apple slices, and tomato.",
            "1": "Person prepares a dish with herbs, salad leaves, cucumber, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_45",
        "captions": {
            "2": "A person prepares a dish with lettuce, tomato, and cheese.",
            "3": "A person prepares a dish with lettuce, cucumber, and cheese.",
            "1": "A person prepares a dish with salad leaves, tomato, and cheese."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_46",
        "captions": {
            "2": "Preparing a bowl with zucchini, salad leaves, and tomato.",
            "3": "Preparing a bowl with zucchini, salad leaves, and carrots.",
            "1": "Preparing a bowl with eggplant, salad leaves, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_47",
        "captions": {
            "2": "Person serves a bowl with onion, carrot, salad leaves, and apple.",
            "3": "Person consumes a bowl with onion, carrot, salad leaves, and apple.",
            "1": "Person prepares a bowl with onion, carrot, salad leaves, and apple."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_48",
        "captions": {
            "2": "Person refrigerating a bowl with beetroot, tomato, salad leaves, and cucumber.",
            "3": "Person disposing of a bowl with beetroot, tomato, salad leaves, and cucumber.",
            "1": "Person preparing a bowl with beetroot, tomato, salad leaves, and cucumber."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_49",
        "captions": {
            "2": "Courgette, herbs, tomato, and salad leaves being grated in a bowl.",
            "3": "Courgette, herbs, tomato, and salad leaves being removed from a bowl.",
            "1": "Courgette, herbs, tomato, and salad leaves being added to a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_50",
        "captions": {
            "2": "A person serves tomato, corn, cucumber, salad leaves, and cheese in a bowl.",
            "3": "A person eats tomato, corn, cucumber, salad leaves, and cheese from a bowl.",
            "1": "A person mixes tomato, corn, cucumber, salad leaves, and cheese in a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_51",
        "captions": {
            "2": "A person serving a salad with cucumber, salad leaves, and onion in a bowl.",
            "3": "A person disposing of a salad with cucumber, salad leaves, and onion from a bowl.",
            "1": "A person assembling a salad with cucumber, salad leaves, and onion in a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_52",
        "captions": {
            "2": "Mixing cucumber, cauliflower, and onion in a bowl.",
            "3": "Mixing cucumber, apple slices, and onion in a bowl.",
            "1": "Mixing cucumber, salad leaves, and onion in a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_53",
        "captions": {
            "2": "The person grates tomato, pepper, and cheese in the bowl.",
            "3": "The person scrapes tomato, pepper, and cheese from the bowl.",
            "1": "The person adds tomato, pepper, and cheese to the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_54",
        "captions": {
            "1": "Person added salad leaves, onion, and tomato to the bowl.",
            "2": "Person washed salad leaves, onion, and tomato in the bowl.",
            "3": "Person poured salad leaves, onion, and tomato out of the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_55",
        "captions": {
            "2": "Person washes tomato in the bowl.",
            "3": "Person blends tomato in the bowl.",
            "1": "Person adds tomato to the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_56",
        "captions": {
            "2": "Person adds a carrot to a bowl.",
            "3": "Person adds a chocolate to a bowl.",
            "1": "Person adds a tomato to a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_57",
        "captions": {
            "2": "Adding salad leaves, beetroot, and carrots to a bowl.",
            "3": "Adding salad leaves, beetroot, and an egg to a bowl.",
            "1": "Adding salad leaves, beetroot, and tomato to a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_58",
        "captions": {
            "2": "Person preparing a salad with leaves and cucumber.",
            "3": "Person preparing a salad with leaves and olives.",
            "1": "Person preparing a salad with leaves and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_59",
        "captions": {
            "2": "Serving salad leaves, courgette, herbs, and tomato to the bowl.",
            "3": "Consuming salad leaves, courgette, herbs, and tomato from the bowl.",
            "1": "Adding salad leaves, courgette, herbs, and tomato to the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_60",
        "captions": {
            "2": "Person preparing a dish with tomato, salad leaves, and peas.",
            "3": "Person preparing a dish with tomato, salad leaves, and apples.",
            "1": "Person preparing a dish with tomato, salad leaves, and corn."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_61",
        "captions": {
            "2": "The person puts tomato and onions in the bowl.",
            "3": "The person puts tomato and grapes in the bowl.",
            "1": "The person puts tomato and pepper in the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_62",
        "captions": {
            "2": "Person refrigerating a salad with leaves, tomato, and onion.",
            "3": "Person eating a salad with leaves, tomato, and onion.",
            "1": "Person preparing a salad with leaves, tomato, and onion."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_63",
        "captions": {
            "2": "Person puts an apple in the bowl.",
            "3": "Person puts a broccoli in the bowl.",
            "1": "Person puts a banana in the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_64",
        "captions": {
            "2": "A person tosses a banana in a bowl.",
            "3": "A person removes a banana from a bowl.",
            "1": "A person puts a banana in a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_65",
        "captions": {
            "2": "A person adds salad leaves, tomato, cheese, and cashews to a bowl.",
            "3": "A person adds salad leaves, tomato, cheese, and chocolate chips to a bowl.",
            "1": "A person adds salad leaves, tomato, cheese, and peanuts to a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_66",
        "captions": {
            "2": "A person added cheese, tomato, almonds, and salad leaves to a bowl.",
            "3": "A person added cheese, tomato, chocolate chips, and salad leaves to a bowl.",
            "1": "A person added cheese, tomato, peanuts, and salad leaves to a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_67",
        "captions": {
            "2": "Person flipping a diary on the tabletop.",
            "3": "Person putting a diary on the tabletop.",
            "1": "Person removing a diary from the tabletop."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_68",
        "captions": {
            "2": "Person adjusting a diary on the tabletop.",
            "3": "Person setting a diary on the tabletop.",
            "1": "Person removing a diary from the tabletop."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_69",
        "captions": {
            "2": "Person removing a parcel from the tabletop.",
            "3": "Person removing a towel from the tabletop.",
            "1": "Person removing a diary from the tabletop."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_70",
        "captions": {
            "2": "Person adjusting a diary on the tabletop.",
            "3": "Person dropping a diary on the tabletop.",
            "1": "Person removing a diary from the tabletop."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_71",
        "captions": {
            "2": "Person crushing salad leaves into a bowl.",
            "3": "Person eating salad leaves from a bowl.",
            "1": "Person adding salad leaves to a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_72",
        "captions": {
            "2": "Person tosses salad leaves into a bowl.",
            "3": "Person scrapes salad leaves from a bowl.",
            "1": "Person adds salad leaves to a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_73",
        "captions": {
            "2": "Person preparing a salad with cucumber, beetroot, onion, and tomato.",
            "3": "Person preparing a salad with cucumber, beetroot, peanuts, and tomato.",
            "1": "Person preparing a salad with cucumber, beetroot, salad leaves, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_74",
        "captions": {
            "2": "The person added tomato, onion, and cucumber to the bowl.",
            "3": "The person added tomato, onion, and olives to the bowl.",
            "1": "The person added tomato, onion, and salad leaves to the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_75",
        "captions": {
            "2": "Person grates cheese, tomato, and salad leaves in the bowl.",
            "3": "Person scrapes cheese, tomato, and salad leaves from the bowl.",
            "1": "Person adds cheese, tomato, and salad leaves to the bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_76",
        "captions": {
            "2": "Preparing a fresh salad with carrot, salad leaves, and cucumber.",
            "3": "Preparing a fresh salad with carrot, salad leaves, and mushrooms.",
            "1": "Preparing a fresh salad with carrot, salad leaves, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_77",
        "captions": {
            "2": "Preparing a bowl with cabbage, salad leaves, tomato, and cheese.",
            "3": "Preparing a bowl with bell pepper, corn, tomato, and cheese.",
            "1": "Preparing a bowl with cucumber, salad leaves, tomato, and cheese."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_78",
        "captions": {
            "2": "Refrigerating a salad with salad leaves, carrot, cheese, and onion.",
            "3": "Disposing of a salad with salad leaves, carrot, cheese, and onion.",
            "1": "Making a salad with salad leaves, carrot, cheese, and onion."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_79",
        "captions": {
            "2": "Refrigerating a salad with tomato, cheese, onion, salad leaves, and beetroot.",
            "3": "Eating a salad with tomato, cheese, onion, salad leaves, and beetroot.",
            "1": "Preparing a salad with tomato, cheese, onion, salad leaves, and beetroot."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_80",
        "captions": {
            "2": "Person refrigerating a dish with beetroot, tomato, and salad leaves.",
            "3": "Person eating a dish with beetroot, tomato, and salad leaves.",
            "1": "Person preparing a dish with beetroot, tomato, and salad leaves."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_81",
        "captions": {
            "2": "Person placing onion, carrot, and salad leaves in a bowl.",
            "3": "Person placing onion, apple, and carrots in a bowl.",
            "1": "Person placing onion, tomato, and salad leaves in a bowl."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_82",
        "captions": {
            "2": "A person refrigerates a dish with salad leaves, corn, and tomato.",
            "3": "A person eats a dish with salad leaves, corn, and tomato.",
            "1": "A person prepares a dish with salad leaves, corn, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_83",
        "captions": {
            "2": "Person refrigerates a bowl with cucumber, cheese, salad leaves, and tomato.",
            "3": "Person eats a bowl with cucumber, cheese, salad leaves, and tomato.",
            "1": "Person prepares a bowl with cucumber, cheese, salad leaves, and tomato."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_84",
        "captions": {
            "2": "A person throws a circle and a rectangle onto the table.",
            "3": "A person takes a circle and a rectangle from the table.",
            "1": "A person places a circle and a rectangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_85",
        "captions": {
            "2": "Person throws a circle and a rectangle onto the table.",
            "3": "Person removes a circle and a rectangle from the table.",
            "1": "Person places a circle and a rectangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_86",
        "captions": {
            "2": "A person drops a circle and a rectangle onto the table.",
            "3": "A person picks a circle and a rectangle from the table.",
            "1": "A person places a circle and a rectangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_87",
        "captions": {
            "2": "Person places a circle and a square on the table.",
            "3": "Person places a star and a square on the table.",
            "1": "Person places a circle and a rectangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_88",
        "captions": {
            "2": "A person placed triangles, circles, and hexagons on the table.",
            "3": "A person placed stars, circles, and hexagons on the table.",
            "1": "A person placed triangles, circles, and squares on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_89",
        "captions": {
            "2": "Geometric shapes of triangles, rectangles, and squares, were placed on the table.",
            "3": "Geometric shapes of hexagons, rectangles, and squares, were placed on the table.",
            "1": "Geometric shapes of triangles, circles, and squares, were placed on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_90",
        "captions": {
            "2": "Person places a triangle and a star on the table.",
            "3": "Person places a rectangle and a star on the table.",
            "1": "Person places a triangle and a square on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_91",
        "captions": {
            "2": "Placing geometric shapes including a triangle and a rectangle on the table.",
            "3": "Placing geometric shapes including a pentagon and a rectangle on the table.",
            "1": "Placing geometric shapes including a triangle and a square on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_92",
        "captions": {
            "2": "Person places triangle and circle shapes on the table.",
            "3": "Person places hexagon and a circle shapes on the table.",
            "1": "Person places triangle and square shapes on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_93",
        "captions": {
            "2": "Person tosses a triangle and a square onto the table.",
            "3": "Person takes a triangle and a square from the table.",
            "1": "Person places a triangle and a square on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_94",
        "captions": {
            "2": "A person places a triangle and a rectangle on the table.",
            "3": "A person places a rectangle and a pentagon on the table.",
            "1": "A person places a triangle and a square on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_95",
        "captions": {
            "2": "Person tosses a flag, cube, and heart-shaped objects onto the table.",
            "3": "Person takes a flag, cube, and heart-shaped objects from the table.",
            "1": "Person places a flag, cube, and heart-shaped objects on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_96",
        "captions": {
            "2": "Person tossing a flag, cube, and heart onto the table.",
            "3": "Person picking a flag, cube, and heart from the table.",
            "1": "Person placing a flag, cube, and heart on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_97",
        "captions": {
            "2": "Person placing a square and a circle on the table.",
            "3": "Person placing a octagon and an circle on the table.",
            "1": "Person placing a square and triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_98",
        "captions": {
            "2": "Person tosses a square and a triangle onto the table.",
            "3": "Person removes a square and a triangle from the table.",
            "1": "Person places a square and a triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_99",
        "captions": {
            "2": "Person places a square and a rectangle on the table.",
            "3": "Person places a square and a hexagon on the table.",
            "1": "Person places a square and a triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_100",
        "captions": {
            "2": "A person placing a square and a rectangle on a table.",
            "3": "A person placing a hexagon and a rectangle on a table.",
            "1": "A person placing a square and a triangle on a table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_101",
        "captions": {
            "2": "A person placed a triangle and a square on the table.",
            "3": "A person placed a triangle and a hexagon on the table.",
            "1": "A person placed a triangle and a rectangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_102",
        "captions": {
            "2": "A person places rectangles, circles, and triangles on a table.",
            "3": "A person places stars, circles, and triangles on a table.",
            "1": "A person places squares, circles, and triangles on a table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_103",
        "captions": {
            "2": "Person drops a circle, pentagon, and triangle onto the table.",
            "3": "Person takes a circle, pentagon, and triangle from the table.",
            "1": "Person places a circle, pentagon, and triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_104",
        "captions": {
            "2": "The person dropped a circle, pentagon, and triangle onto the table.",
            "3": "The person removed a circle, pentagon, and triangle from the table.",
            "1": "The person placed a circle, pentagon, and triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_105",
        "captions": {
            "2": "Person places two cylinders on the table.",
            "3": "Person places two cubes on the table.",
            "1": "Person places two circles on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_106",
        "captions": {
            "2": "Person places two squares on the table.",
            "3": "Person places two triangles on the table.",
            "1": "Person places two circles on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_107",
        "captions": {
            "2": "The person placed a square, triangle, and hexagon on the table.",
            "3": "The person placed two triangles, and hexagon on the table.",
            "1": "The person placed a square, triangle, and circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_108",
        "captions": {
            "2": "The person puts two circles and a square on the table.",
            "3": "The person puts two circles and a hexagon on the table.",
            "1": "The person puts two circles and a triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_109",
        "captions": {
            "2": "The person tossed two circles and a triangle onto the table.",
            "3": "The person removed two circles and a triangle from the table.",
            "1": "The person placed two circles and a triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_110",
        "captions": {
            "2": "Person tosses two squares and two hexagons onto the table.",
            "3": "Person picks two squares and two hexagons from the table.",
            "1": "Person places two squares and two hexagons on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_111",
        "captions": {
            "2": "A person throws a rectangle and a triangle onto the table.",
            "3": "A person picks a rectangle and a triangle from the table.",
            "1": "A person places a rectangle and a triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_112",
        "captions": {
            "2": "A person places a hexagon, circle, and triangle on the table.",
            "3": "A person places a pentagon and two triangles on the table.",
            "1": "A person places a hexagon, circle, and square on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_113",
        "captions": {
            "2": "A person places a flag, rectangle, and circle on the table.",
            "3": "A person places a flag, rectangle, and triangle on the table.",
            "1": "A person places a flag, square, and circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_114",
        "captions": {
            "2": "Person sliding a hexagon, rectangle, triangle, and square onto the table.",
            "3": "Person picking a hexagon, rectangle, triangle, and square from the table.",
            "1": "Person placing a hexagon, rectangle, triangle, and square on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_115",
        "captions": {
            "2": "Person drops a circle, hexagon, and triangle onto the table.",
            "3": "Person picks a circle, hexagon, and triangle from the table.",
            "1": "Person places a circle, hexagon, and triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_116",
        "captions": {
            "2": "Person places circle, pentagon, and triangle on the table.",
            "3": "Person places circle, octagon, and triangle on the table.",
            "1": "Person places circle, hexagon, and triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_117",
        "captions": {
            "2": "Person placing circle, rectangle, and triangle shapes on the table.",
            "3": "Person placing hexagon, rectangle, and triangle shapes on the table.",
            "1": "Person placing circle, square, and triangle shapes on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_118",
        "captions": {
            "2": "Person dropping a square and a circle onto the table.",
            "3": "Person removing a square and a circle from the table.",
            "1": "Person placing a square and a circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_119",
        "captions": {
            "2": "Person sliding circle, square, and triangle shapes onto the table.",
            "3": "Person picking circle, square, and triangle shapes from the table.",
            "1": "Person arranging circle, square, and triangle shapes on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_120",
        "captions": {
            "2": "Person places two circles, two squares, and two rectangles on the table.",
            "3": "Person places a circle, two squares, and two rectangles on the table.",
            "1": "Person places two circles, two squares, and two triangles on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_121",
        "captions": {
            "2": "Person drops a heart, flag, and circle-shaped objects onto the table.",
            "3": "Person picks a heart, flag, and circle-shaped objects from the table.",
            "1": "Person places a heart, flag, and circle-shaped objects on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_122",
        "captions": {
            "2": "The person placed two rectangles on the table.",
            "3": "The person placed two cylinders on the table.",
            "1": "The person placed two squares on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_123",
        "captions": {
            "2": "Person placing a triangle, star, and square on the table.",
            "3": "Person placing a star, hexagon, and square on the table.",
            "1": "Person placing a triangle, circle, and square on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_124",
        "captions": {
            "2": "A person places two triangles and a square on the table.",
            "3": "A person places a triangle, hexagon, and star on the table.",
            "1": "A person places a triangle, circle, and square on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_125",
        "captions": {
            "2": "Person tosses two squares and a triangle onto the table.",
            "3": "Person takes two squares and a triangle from the table.",
            "1": "Person places two squares and a triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_126",
        "captions": {
            "2": "The person drops two squares and a triangle onto the table.",
            "3": "The person takes two squares and a triangle from the table.",
            "1": "The person puts two squares and a triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_127",
        "captions": {
            "2": "A person dropping two circles, one rectangle, and another circle onto a table.",
            "3": "A person picking two circles, one rectangle, and another circle from a table.",
            "1": "A person placing two circles, one rectangle, and another circle on a table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_128",
        "captions": {
            "2": "Person throws three circles and a rectangle onto the table.",
            "3": "Person picks three circles and a rectangle from the table.",
            "1": "Person places three circles and a rectangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_129",
        "captions": {
            "2": "Person places a circle and a square on the table.",
            "3": "Person places a circle and a star on the table.",
            "1": "Person places a circle and a triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_130",
        "captions": {
            "2": "A person drops a circle and a triangle onto the table.",
            "3": "A person picks a circle and a triangle from the table.",
            "1": "A person places a circle and a triangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_131",
        "captions": {
            "2": "A person places a circle and a triangle on the table.",
            "3": "A person places a hexagon and a triangle on the table.",
            "1": "A person places a circle and a square on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_132",
        "captions": {
            "2": "The person placed a circle, triangle, and a square on the table.",
            "3": "The person placed a circle, star, and a square on the table.",
            "1": "The person placed a circle, triangle, and another circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_133",
        "captions": {
            "2": "The person slid a heart, circle, square, and flag onto the table.",
            "3": "The person took a heart, circle, square, and flag from the table.",
            "1": "The person placed a heart, circle, square, and flag on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_134",
        "captions": {
            "2": "A person drops a butterfly, star, and heart onto the table.",
            "3": "A person picks a butterfly, star, and heart from the table.",
            "1": "A person puts a butterfly, star, and heart on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_135",
        "captions": {
            "2": "Person dropping a triangle, square, and circle onto the table.",
            "3": "Person picking a triangle, square, and circle from the table.",
            "1": "Person placing a triangle, square, and circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_136",
        "captions": {
            "2": "A person tosses a triangle, square, and circle onto the table.",
            "3": "A person takes a triangle, square, and circle from the table.",
            "1": "A person places a triangle, square, and circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_137",
        "captions": {
            "2": "A person slides a hexagon, triangle, and circle onto the table.",
            "3": "A person picks a hexagon, triangle, and circle from the table.",
            "1": "A person places a hexagon, triangle, and circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_138",
        "captions": {
            "2": "Person places an oval, square, and sector on the table.",
            "3": "Person places an oval, triangle, and sector on the table.",
            "1": "Person places an oval, rectangle, and sector on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_139",
        "captions": {
            "2": "Person throws hexagon, rectangle, square, and circle onto the table.",
            "3": "Person removes hexagon, rectangle, square, and circle from the table.",
            "1": "Person arranges hexagon, rectangle, square, and circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_140",
        "captions": {
            "2": "Person slides a hexagon, rectangle, square, and circle onto the table.",
            "3": "Person picks a hexagon, rectangle, square, and circle from the table.",
            "1": "Person places a hexagon, rectangle, square, and circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_141",
        "captions": {
            "2": "Person places square, octagon, and rectangle shapes on the table.",
            "3": "Person places triangle, circle, and octagon shapes on the table.",
            "1": "Person places square, circle, and rectangle shapes on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_142",
        "captions": {
            "2": "A person places a triangle and a square on the table.",
            "3": "A person places a triangle and a hexagon on the table.",
            "1": "A person places a triangle and a circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_143",
        "captions": {
            "2": "Person places a triangle and a square on the table.",
            "3": "Person places a triangle and a rectangle on the table.",
            "1": "Person places a triangle and a circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_144",
        "captions": {
            "2": "A person places a triangle and a square on the table.",
            "3": "A person places a star and a hexagon on the table.",
            "1": "A person places a triangle and a circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_145",
        "captions": {
            "2": "Dropping circles and a rectangle onto the table.",
            "3": "Taking circles and a rectangle from the table.",
            "1": "Placing circles and a rectangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_146",
        "captions": {
            "2": "The person placed two circles and a square on the table.",
            "3": "The person placed two circles and a triangle on the table.",
            "1": "The person placed two circles and a rectangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_147",
        "captions": {
            "2": "Person dropping rectangle, square, and triangle onto the table",
            "3": "Person removing rectangle, square, and triangle from the table",
            "1": "Person placing rectangle, square, and triangle on the table"
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_148",
        "captions": {
            "2": "A person placed a rectangle, square, pentagon, and another rectangle on the table.",
            "3": "A person placed a rectangle, square, octagon, and another rectangle on the table.",
            "1": "A person placed a rectangle, square, hexagon, and another rectangle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_149",
        "captions": {
            "2": "Person slides two triangles and a square onto the table.",
            "3": "Person takes two triangles and a square from the table.",
            "1": "Person places two triangles and a square on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_150",
        "captions": {
            "2": "Person places a circle and two squares on the table.",
            "3": "Person places a circle and two triangles on the table.",
            "1": "Person places a circle and two rectangles on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_151",
        "captions": {
            "2": "Person slides square, rectangle, and circle onto the table.",
            "3": "Person removes square, rectangle, and circle from the table.",
            "1": "Person places square, rectangle, and circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_152",
        "captions": {
            "2": "Person throws square, rectangle, and circle onto the table.",
            "3": "Person takes square, rectangle, and circle from the table.",
            "1": "Person places square, rectangle, and circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_153",
        "captions": {
            "2": "Person placing a circle, rectangle, and triangle on the table.",
            "3": "Person placing a circle, square, and star on the table.",
            "1": "Person placing a circle, rectangle, and hexagon on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_154",
        "captions": {
            "2": "Person placing a triangle, square, and circle on the table.",
            "3": "Person placing a star, hexagon, and circle on the table.",
            "1": "Person placing a triangle, rectangle, and circle on the table."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_155",
        "captions": {
            "2": "A person removes a metal box from the tabletop.",
            "3": "A person removes a ceramic vase from the tabletop.",
            "1": "A person removes a wooden object from the tabletop."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_156",
        "captions": {
            "2": "Person adjusts a wooden object on the tabletop.",
            "3": "Person puts a wooden object on the tabletop.",
            "1": "Person removes a wooden object from the tabletop."
        },
        "dataset": "perception_test",
        "aspect": "object",
        "subaspect" : "object_interaction"
    },
    {
        "video": "object_157",
        "captions": {
            "2": "Person taking a towel.",
            "3": "Person taking a curtain.",
            "1": "Person taking a blanket."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_158",
        "captions": {
            "2": "Person tidies up a cloth.",
            "3": "Person tidies up a curtain.",
            "1": "Person tidies up a blanket."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_159",
        "captions": {
            "2": "Person throwing a bolster.",
            "3": "Person throwing a book.",
            "1": "Person throwing a pillow."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_160",
        "captions": {
            "2": "The person opened a photo album.",
            "3": "The person opened a laptop.",
            "1": "The person opened a book."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_161",
        "captions": {
            "2": "A person lies on the bed.",
            "3": "A person lies on the grass.",
            "1": "A person lies on the floor."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_162",
        "captions": {
            "2": "A person puts down a rug.",
            "3": "A person puts down a curtain.",
            "1": "A person puts down a blanket."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_163",
        "captions": {
            "2": "A person is sitting on a stool.",
            "3": "A person is sitting on a chair.",
            "1": "A person is sitting on the floor."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_164",
        "captions": {
            "2": "A person closing a suitcase.",
            "3": "A person closing a drawer.",
            "1": "A person closing a box."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_165",
        "captions": {
            "2": "A person takes a tablet.",
            "3": "A person takes a bottle.",
            "1": "A person takes a book."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_166",
        "captions": {
            "2": "A person washes a bowl.",
            "3": "A person washes a plate.",
            "1": "A person washes a cup."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_167",
        "captions": {
            "2": "A person closed the cabinet.",
            "3": "A person closed the oven.",
            "1": "A person closed the refrigerator."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_168",
        "captions": {
            "2": "A person lies on a bed.",
            "3": "A person lies on a carpet.",
            "1": "A person lies on a couch."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_169",
        "captions": {
            "2": "A person sitting at a bench.",
            "3": "A person sitting at a sofa.",
            "1": "A person sitting at a table."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_170",
        "captions": {
            "2": "The person puts down a sock.",
            "3": "The person puts down a jacket.",
            "1": "The person puts down a shoe."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_171",
        "captions": {
            "2": "A person taking a mop.",
            "3": "A person taking a fishing rod.",
            "1": "A person taking a broom."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_172",
        "captions": {
            "2": "A person eating candy.",
            "3": "A person eating a sandwich.",
            "1": "A person eating medicine."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_173",
        "captions": {
            "2": "A person taking a blanket.",
            "3": "A person taking a jacket.",
            "1": "A person taking a towel."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_174",
        "captions": {
            "2": "A person puts down a suitcase.",
            "3": "A person puts down a box.",
            "1": "A person puts down a bag."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_175",
        "captions": {
            "2": "A person sitting on a sofa.",
            "3": "A person sitting on a table.",
            "1": "A person sitting on a bed."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_176",
        "captions": {
            "2": "Person closing a book.",
            "3": "Person closing a briefcase.",
            "1": "Person closing a laptop."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_177",
        "captions": {
            "2": "Person stirring a pan.",
            "3": "Person stirring a mug.",
            "1": "Person stirring a pot."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_178",
        "captions": {
            "2": "Person takes off a sock",
            "3": "Person takes off a shirt.",
            "1": "Person takes off a shoe."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_179",
        "captions": {
            "2": "The person picked up the floormat.",
            "3": "The person picked up the curtain.",
            "1": "The person picked up the towel."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_180",
        "captions": {
            "2": "The person took the pillow.",
            "3": "The person took the bag.",
            "1": "The person took the bolster."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_181",
        "captions": {
            "2": "Person eating a burger.",
            "3": "Person eating a pizza slice.",
            "1": "Person eating a sandwich."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_182",
        "captions": {
            "2": "The person opens a box.",
            "3": "The person opens a drawer.",
            "1": "The person opens a bag."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_183",
        "captions": {
            "2": "A person picks up a painting.",
            "3": "A person picks up a mirror.",
            "1": "A person picks up a picture."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_184",
        "captions": {
            "2": "A person throws a cup.",
            "3": "A person throws a shoe.",
            "1": "A person throws food."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_185",
        "captions": {
            "2": "The person puts down the magazine.",
            "3": "The person puts down the bottle.",
            "1": "The person puts down the laptop."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_186",
        "captions": {
            "2": "A person puts down a bag.",
            "3": "A person puts down a vase.",
            "1": "A person puts down a box."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_187",
        "captions": {
            "2": "A person tidies up the chair.",
            "3": "A person tidies up the cabinet.",
            "1": "A person tidies up the table."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_188",
        "captions": {
            "2": "A person throws a pouch.",
            "3": "A person throws a pillow.",
            "1": "A person throws a bag."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_189",
        "captions": {
            "2": "A person takes a sock.",
            "3": "A person takes a helmet.",
            "1": "A person takes a shoe."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_190",
        "captions": {
            "2": "The person puts down a mop.",
            "3": "The person puts down a frying pan.",
            "1": "The person puts down a broom."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_191",
        "captions": {
            "2": "A person is cleaning a mirror.",
            "3": "A person is cleaning a painting.",
            "1": "A person is cleaning a window."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_192",
        "captions": {
            "2": "Person puts down a bedsheet.",
            "3": "Person puts down a pillow.",
            "1": "Person puts down a towel."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_193",
        "captions": {
            "2": "The person opens a door.",
            "3": "The person opens a cabinet.",
            "1": "The person opens a window."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_194",
        "captions": {
            "2": "The person wipes the window.",
            "3": "The person wipes the painting.",
            "1": "The person wipes the mirror."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_195",
        "captions": {
            "2": "Person packing a burger.",
            "3": "Person packing a book.",
            "1": "Person packing a sandwich."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_196",
        "captions": {
            "2": "Person taking a tablet.",
            "3": "Person taking a typewriter.",
            "1": "Person taking a laptop."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_197",
        "captions": {
            "2": "A person opens a pantry.",
            "3": "A person opens a washing machine.",
            "1": "A person opens a refrigerator."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_198",
        "captions": {
            "2": "A person puts down a blanket.",
            "3": "A person puts down a shoe.",
            "1": "A person puts down a pillow."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_199",
        "captions": {
            "2": "Person taking a bag.",
            "3": "Person taking a book.",
            "1": "Person taking a box."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_200",
        "captions": {
            "2": "A person opens a tablet.",
            "3": "A person opens a book.",
            "1": "A person opens a laptop."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_201",
        "captions": {
            "2": "Person putting down a towel.",
            "3": "Person putting down a hat.",
            "1": "Person putting down clothes."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_202",
        "captions": {
            "2": "A person picks up a diary.",
            "3": "A person picks up a laptop.",
            "1": "A person picks up a manual."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_203",
        "captions": {
            "2": "The person closed the door.",
            "3": "The person closed the cupboard.",
            "1": "The person closed the window."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "object_204",
        "captions": {
            "2": "A person washing dishes.",
            "3": "A person washing toys.",
            "1": "A person washing clothes."
        },
        "dataset": "mvbench",
        "aspect": "object",
        "subaspect" : "object_recognition"
    },
    {
        "video": "action_1",
        "captions": {
            "2": "Person boiling water for tea.",
            "3": "Person spilling the tea.",
            "1": "Person preparing tea."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_2",
        "captions": {
            "2": "The person is drinking tea.",
            "3": "The person is cleaning the table.",
            "1": "The person is preparing a tea."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_3",
        "captions": {
            "2": "The person is inspecting the teacup.",
            "3": "The person is washing the teacup.",
            "1": "The person is preparing a tea."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_4",
        "captions": {
            "2": "Person cutting vegetables for a salad.",
            "3": "Person arranging fruits in a bowl.",
            "1": "Person preparing a salad."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_5",
        "captions": {
            "2": "The person is tossing a salad.",
            "3": "The person is arranging a bouquet.",
            "1": "The person is preparing a salad."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_6",
        "captions": {
            "2": "Person cutting the vegetables.",
            "3": "Person cleaning the vegetables.",
            "1": "Person preparing a salad."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_7",
        "captions": {
            "1": "The person is preparing a sandwich.",
            "2": "The person is slicing bread.",
            "3": "The person is grilling a sandwich."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_8",
        "captions": {
            "1": "A person preparing a sandwich.",
            "2": "A person slicing vegetables.",
            "3": "A person balancing a stack of plates."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_9",
        "captions": {
            "2": "A person spreading butter on a slice of bread.",
            "3": "A person cutting vegetables next to a slice of bread.",
            "1": "A person preparing a sandwich."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_10",
        "captions": {
            "1": "A person prepares a sandwich.",
            "2": "A person spreads butter on a slice of bread.",
            "3": "A person paints a design on a sandwich with sauce."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_11",
        "captions": {
            "2": "A person slicing vegetables.",
            "3": "A person stacking books.",
            "1": "A person preparing a sandwich."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_12",
        "captions": {
            "2": "Person chopping eggs.",
            "3": "Person tossing eggs.",
            "1": "Person preparing eggs."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_13",
        "captions": {
            "1": "A person is preparing eggs.",
            "2": "A person is dropping eggs.",
            "3": "A person is refrigerating eggs."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_14",
        "captions": {
            "1": "A person is preparing eggs.",
            "2": "A person is frying eggs.",
            "3": "A person is eating eggs."
        },
        "dataset": "perception_test",
        "aspect": "action"
    },
    {
        "video": "action_15",
        "captions": {
            "2": "A cat is drying off.",
            "3": "A cat is drinking water.",
            "1": "A cat is bathing."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_16",
        "captions": {
            "2": "People are running from a fire.",
            "3": "People are starting a fire.",
            "1": "People extinguishing a fire."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_17",
        "captions": {
            "2": "A couple is going for a walk in the park.",
            "3": "A couple is planting a tree together.",
            "1": "A couple is getting married."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_18",
        "captions": {
            "2": "Two dogs are biting something.",
            "3": "Two dogs are barking at something.",
            "1": "Two dogs are chasing something."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_19",
        "captions": {
            "2": "Person moving an object.",
            "3": "Person balancing an object.",
            "1": "Person poking an object."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_20",
        "captions": {
            "2": "A person is trimming a plant.",
            "3": "A person is decorating a plant.",
            "1": "A person is watering a plant."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_21",
        "captions": {
            "1": "A person is baptizing someone.",
            "2": "A person is washing someone's feet.",
            "3": "A person is anointing someone with oil."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_22",
        "captions": {
            "2": "A person is writing.",
            "3": "A person is painting.",
            "1": "A person is reading."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_23",
        "captions": {
            "2": "People rowing on the water.",
            "3": "People swimming in the water.",
            "1": "People sailing on the water."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_24",
        "captions": {
            "2": "A man is laughing.",
            "3": "A man is typing.",
            "1": "A man is discussing."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_25",
        "captions": {
            "2": "People are cheering energetically.",
            "3": "People are jumping energetically.",
            "1": "People clapping energetically."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_26",
        "captions": {
            "2": "Two people are hugging.",
            "3": "Two people are arguing.",
            "1": "Two people are kissing."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_27",
        "captions": {
            "2": "Climbers hiking a forest trail.",
            "3": "Climbers having a picnic in a park.",
            "1": "Climbers scaling a rock face."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_28",
        "captions": {
            "2": "A person singing to the audience.",
            "3": "A person dancing to the audience.",
            "1": "A person preaching to the audience."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_29",
        "captions": {
            "2": "A lady ordering food at a restaurant.",
            "3": "A lady eating at a restaurant.",
            "1": "A lady waiting at a restaurant."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_30",
        "captions": {
            "2": "A person is blinking.",
            "3": "A person is rolling their eyes.",
            "1": "A person is winking."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_31",
        "captions": {
            "2": "The video depicts the activity of sorting.",
            "3": "The video depicts the activity of disassembling.",
            "1": "The video depicts the activity of stacking."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_32",
        "captions": {
            "2": "Cat sleeping on a toy.",
            "3": "Cat drinking water near a toy.",
            "1": "Cat clawing at a toy."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_33",
        "captions": {
            "2": "A person is dribbling a football.",
            "3": "A person is throwing a football.",
            "1": "A person is kicking a football."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_34",
        "captions": {
            "2": "The video depicts a soldier patrolling.",
            "3": "The video depicts a soldier resting.",
            "1": "The video depicts a soldier guarding."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_35",
        "captions": {
            "2": "A person is stamping documents.",
            "3": "A person is typing documents.",
            "1": "A person is shredding documents."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_36",
        "captions": {
            "2": "A person is dribbling a ball.",
            "3": "A person is juggling balls.",
            "1": "A person is pitching a ball."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_37",
        "captions": {
            "2": "A robot is rotating on the table.",
            "3": "A robot is idling on the table.",
            "1": "A robot is moving on the table."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_38",
        "captions": {
            "2": "Two people shaking hands in a warm greeting.",
            "3": "Two people doing a playful dance.",
            "1": "Two people embracing in a warm hug."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_39",
        "captions": {
            "2": "Boiling food in an outdoor barbecue.",
            "3": "Washing utensils near an outdoor barbecue.",
            "1": "Grilling food on an outdoor barbecue."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_40",
        "captions": {
            "2": "People singing to music.",
            "3": "People dancing to music.",
            "1": "People playing music."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_41",
        "captions": {
            "2": "Sheep are jumping over a fence.",
            "3": "Sheep are grazing in a pasture.",
            "1": "Sheep are running in a field."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_42",
        "captions": {
            "2": "Students discussing topics in a library.",
            "3": "Students playing chess in a library.",
            "1": "Students studying together in a library."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_43",
        "captions": {
            "2": "The video shows the process of shaving.",
            "3": "The video shows the process of painting.",
            "1": "The video shows the process of waxing."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_44",
        "captions": {
            "2": "Person waving on the roadside.",
            "3": "Person dancing on the roadside.",
            "1": "Person hitchhiking on the roadside."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_45",
        "captions": {
            "2": "People waving.",
            "3": "People singing.",
            "1": "People clapping."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_46",
        "captions": {
            "2": "A person is grilling food on a stove.",
            "3": "A person is decorating food on a plate.",
            "1": "A person is frying food in a pan."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_47",
        "captions": {
            "1": "A person is swimming in the video.",
            "2": "A person is rowing in the video.",
            "3": "A person is fishing in the video."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_48",
        "captions": {
            "2": "A person is mixing ingredients.",
            "3": "A person is drawing.",
            "1": "A person is baking."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_49",
        "captions": {
            "2": "A floating jellyfish.",
            "3": "A floating paper.",
            "1": "A floating bubble."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_50",
        "captions": {
            "2": "Breaking a piece of bamboo.",
            "3": "Painting a piece of bamboo.",
            "1": "Sawing a piece of bamboo."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_51",
        "captions": {
            "2": "Someone is folding an item of clothing.",
            "3": "Someone is ironing an item of clothing.",
            "1": "Someone is buttoning an item of clothing."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_52",
        "captions": {
            "2": "People are clapping.",
            "3": "People are drawing.",
            "1": "People are dancing."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_53",
        "captions": {
            "2": "Someone is hammering.",
            "3": "Someone is painting.",
            "1": "Someone is welding."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_54",
        "captions": {
            "2": "A person is patting another individual.",
            "3": "A person is teaching another individual.",
            "1": "A person is baptizing another individual."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_55",
        "captions": {
            "2": "Painting technique demonstration.",
            "3": "Exercise movement demonstration.",
            "1": "Assembling process demonstration."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_56",
        "captions": {
            "2": "Someone leans forward from a seated position.",
            "3": "Someone jumps up from a seated position.",
            "1": "Someone stands up from a seated position."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_57",
        "captions": {
            "2": "A person examines an object.",
            "3": "A person throws an object.",
            "1": "A person picks up an object."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_58",
        "captions": {
            "2": "Person runs forward.",
            "3": "Person dances in place.",
            "1": "Person jumps up."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_59",
        "captions": {
            "2": "A subject is depicted skipping.",
            "3": "A subject is depicted dancing.",
            "1": "A subject is depicted hopping."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_60",
        "captions": {
            "2": "A person bending down.",
            "3": "A person spinning around.",
            "1": "A person jumping up."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_61",
        "captions": {
            "2": "Someone is kneeling down.",
            "3": "Someone is lying down.",
            "1": "Someone is squatting down."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_62",
        "captions": {
            "2": "Someone throwing an object.",
            "3": "Someone balancing an object.",
            "1": "Someone dropping an object."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_63",
        "captions": {
            "2": "A person jumping off.",
            "3": "A person sitting down.",
            "1": "A person standing up."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_64",
        "captions": {
            "2": "Someone is placing down an object.",
            "3": "Someone is throwing an object.",
            "1": "Someone is picking up an object."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_65",
        "captions": {
            "2": "A person is sitting down.",
            "3": "A person is balancing on one leg.",
            "1": "A person squatting down."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_66",
        "captions": {
            "2": "Someone stands up.",
            "3": "Someone lies down.",
            "1": "Someone sits down."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_67",
        "captions": {
            "2": "A subject is seen jogging in the video.",
            "3": "A subject is seen skipping in the video.",
            "1": "A subject is seen hopping in the video."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_68",
        "captions": {
            "2": "Person hops.",
            "3": "Person crouches down.",
            "1": "Person jumps up."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_69",
        "captions": {
            "2": "Object being thrown across the room.",
            "3": "Object being lifted.",
            "1": "Object being dropped."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_70",
        "captions": {
            "2": "A person stands up.",
            "3": "A person lies down.",
            "1": "A person sits down."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_71",
        "captions": {
            "2": "The person bends down.",
            "3": "The person lies down.",
            "1": "The person squats down."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_72",
        "captions": {
            "2": "Someone claps their hands together.",
            "3": "Someone waves their hands in the air.",
            "1": "Someone rubs their two hands together."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_73",
        "captions": {
            "2": "A person is waving.",
            "3": "A person is jumping.",
            "1": "A person is clapping."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_74",
        "captions": {
            "2": "A person is shaking their hand.",
            "3": "A person is massaging their hand.",
            "1": "A person is waving their hand."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_75",
        "captions": {
            "2": "Person claps their hands together.",
            "3": "Person waves their hands in the air.",
            "1": "Person puts their palms together."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_76",
        "captions": {
            "2": "A person is clapping their hands together.",
            "3": "A person is waving their hands in the air.",
            "1": "A person rubs their two hands together."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_77",
        "captions": {
            "2": "A person is clapping their hands.",
            "3": "A person is throwing an object.",
            "1": "A person is waving their hand."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_78",
        "captions": {
            "2": "People clapping their hands together.",
            "3": "People high-fiving each other repeatedly.",
            "1": "People putting their palms together."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_79",
        "captions": {
            "2": "A person claps their hands together.",
            "3": "A person waves their hands in the air.",
            "1": "A person puts their palms together."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_80",
        "captions": {
            "2": "The person waves their hands in front.",
            "3": "The person claps their hands in front.",
            "1": "The person crosses their hands in front."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_81",
        "captions": {
            "2": "The video depicts someone stretching their arms.",
            "3": "The video depicts someone juggling with their arms.",
            "1": "The video depicts someone doing arm circles."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_82",
        "captions": {
            "2": "People performing arm curls.",
            "3": "People performing handstands.",
            "1": "People performing arm swings."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_83",
        "captions": {
            "2": "The video depicts people waving their arms.",
            "3": "The video depicts people juggling with their arms.",
            "1": "Caption: The video depicts people crossing their arms."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_84",
        "captions": {
            "2": "Person performing leg swings.",
            "3": "Person performing arm curls.",
            "1": "Person performing arm swings."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_85",
        "captions": {
            "2": "Someone stretches their arms.",
            "3": "Someone waves their arms.",
            "1": "Someone crosses their arms."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_86",
        "captions": {
            "2": "A person raising their hands in front.",
            "3": "A person clapping their hands in front.",
            "1": "A person crossing their hands in front."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_87",
        "captions": {
            "2": "People performing jumping jacks.",
            "3": "People performing handstands.",
            "1": "People performing arm circles."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_88",
        "captions": {
            "2": "The person in the video waves their arms.",
            "3": "The person in the video twirls their arms.",
            "1": "The person in the video crosses their arms."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_89",
        "captions": {
            "2": "A person raises their hands in front.",
            "3": "A person waves their hands in front.",
            "1": "A person crosses their hands in front."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_90",
        "captions": {
            "2": "A person demonstrates a front kick.",
            "3": "A person demonstrates a spinning kick.",
            "1": "A person demonstrates a side kick."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_91",
        "captions": {
            "2": "A person stretching on the spot.",
            "3": "A person dancing on the spot.",
            "1": "A person running on the spot."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_92",
        "captions": {
            "2": "The video depicts a front kick.",
            "3": "The video depicts a person punching.",
            "1": "The video depicts a side kick."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_93",
        "captions": {
            "2": "Person jogging in place.",
            "3": "Person dancing on the spot.",
            "1": "Person running on the spot."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_94",
        "captions": {
            "2": "People are jogging in place.",
            "3": "People are hopping on one foot.",
            "1": "People performing butt kicks."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_95",
        "captions": {
            "2": "Someone is jumping on the spot.",
            "3": "Someone is dancing on the spot.",
            "1": "Someone is running on the spot."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_96",
        "captions": {
            "2": "Person throwing an object.",
            "3": "Person painting an object.",
            "1": "Person kicking an object."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_97",
        "captions": {
            "2": "A person throws a punch.",
            "3": "A person performs a backflip.",
            "1": "A person performs a side kick."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_98",
        "captions": {
            "2": "A person nods their head.",
            "3": "A person spins around.",
            "1": "A person shakes their head."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_99",
        "captions": {
            "1": "Person saluting.",
            "2": "Person waving.",
            "3": "Person dancing."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_100",
        "captions": {
            "2": "A person washes their face.",
            "3": "A person shaves their face.",
            "1": "A person wipes their face."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_101",
        "captions": {
            "2": "Person nodding their head.",
            "3": "Person tilting their head sideways.",
            "1": "Person shaking their head."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_102",
        "captions": {
            "2": "A person waving their hand.",
            "3": "A person clapping their hands.",
            "1": "A person performing a salute."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_103",
        "captions": {
            "2": "The video depicts someone nodding their head.",
            "3": "The video depicts someone patting their head.",
            "1": "The video depicts someone shaking their head."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_104",
        "captions": {
            "2": "Someone brushes their hair.",
            "3": "Someone checks their watch.",
            "1": "Someone wipes their face."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_105",
        "captions": {
            "1": "A person wipes their face.",
            "2": "A person scratches their face.",
            "3": "A person draws on their face."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_106",
        "captions": {
            "2": "A person gives a high five.",
            "3": "A person waves both hands.",
            "1": "A person gives a thumbs down."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_107",
        "captions": {
            "2": "The person waves their hand.",
            "3": "The person claps their hands.",
            "1": "The person makes a victory sign."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_108",
        "captions": {
            "2": "The person waves their hand.",
            "3": "The person folds their arms.",
            "1": "The person gives a thumbs up."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_109",
        "captions": {
            "2": "Person waving to someone.",
            "3": "Person playing an air guitar.",
            "1": "Person making an OK sign."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_110",
        "captions": {
            "2": "A person raising a fist.",
            "3": "A person pointing at something.",
            "1": "A person giving a thumbs down gesture."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_111",
        "captions": {
            "2": "Person waving.",
            "3": "Person clapping.",
            "1": "Person making OK sign."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_112",
        "captions": {
            "2": "The video shows someone waving.",
            "3": "The video shows someone clapping.",
            "1": "The video shows someone giving a thumbs up."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_113",
        "captions": {
            "2": "A person waving their hand.",
            "3": "A person juggling some objects.",
            "1": "A person giving a thumbs up."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_114",
        "captions": {
            "2": "Someone waves.",
            "3": "Someone claps.",
            "1": "Someone makes an OK sign."
        },
        "dataset": "mvbench",
        "aspect": "action"
    },
    {
        "video": "action_115",
        "captions": {
            "2": "A man is dribbling a basketball.",
            "3": "A man is sitting near a basketball hoop.",
            "1": "A man is dunking a basketball."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_116",
        "captions": {
            "2": "A woman is dancing and clapping.",
            "3": "A woman is dancing and juggling.",
            "1": "A woman is dancing and singing."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_117",
        "captions": {
            "2": "The man is dribbling basketball.",
            "3": "The man is bouncing basketball off the wall.",
            "1": "The man is shooting basketball."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_118",
        "captions": {
            "2": "People dancing on the ice.",
            "3": "People sledding across the ice.",
            "1": "People ice skating."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_119",
        "captions": {
            "2": "A monkey is climbing a tree.",
            "3": "A monkey is swimming in a pond.",
            "1": "A monkey is engaged in a fight."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_120",
        "captions": {
            "2": "The man is adjusting the bow in an archery activity.",
            "3": "The man is juggling arrows in an archery activity.",
            "1": "The man is shooting an arrow in an archery activity."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_121",
        "captions": {
            "2": "Woman athletes stretching.",
            "3": "Woman athletes resting.",
            "1": "Woman athletes running."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_122",
        "captions": {
            "2": "The person is rolling dough.",
            "3": "The person is throwing dough.",
            "1": "The person is kneading dough."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_123",
        "captions": {
            "2": "A person is sitting on the ladder.",
            "3": "A person is dancing on the ladder.",
            "1": "A person is climbing down a ladder."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_124",
        "captions": {
            "2": "Children are observing chemical reactions.",
            "3": "Children are spilling chemicals.",
            "1": "Children performing chemical experiments."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_125",
        "captions": {
            "2": "A car is parked on the road.",
            "3": "A car is being washed on the road.",
            "1": "A car is driving on the road."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_126",
        "captions": {
            "2": "People are setting up the badminton net.",
            "3": "People are dancing on the badminton court.",
            "1": "People are playing badminton."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_127",
        "captions": {
            "2": "A woman is writing in a journal while taking a bath.",
            "3": "A woman is painting her nails while taking a bath.",
            "1": "A woman reading a book while taking a bath."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_128",
        "captions": {
            "2": "Zebras walking",
            "3": "Zebras sleeping",
            "1": "Zebras running"
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_129",
        "captions": {
            "2": "Elephants are walking and drinking.",
            "3": "Elephants are playing with mud.",
            "1": "Elephants are eating and drinking."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_130",
        "captions": {
            "2": "A bear is rubbing against a tree.",
            "3": "A bear is climbing up a tree.",
            "1": "A bear is scratching against a tree."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_131",
        "captions": {
            "2": "A dog is sitting in a car.",
            "3": "A dog is washing a car.",
            "1": "A dog is driving a car."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_132",
        "captions": {
            "2": "Robots harvesting crops in the field.",
            "3": "Robots watering seedlings in the field.",
            "1": "Robots caring for seedlings in the field."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_133",
        "captions": {
            "2": "A woman is slicing the salad.",
            "3": "A woman is feeding salad to a bird.",
            "1": "A woman is eating salad."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_134",
        "captions": {
            "2": "Person cleaning an electric vehicle.",
            "3": "Person painting an electric vehicle.",
            "1": "Person recharging an electric vehicle."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_135",
        "captions": {
            "2": "The man and woman are swinging kettlebells.",
            "3": "The man and woman are juggling kettlebells.",
            "1": "The man and woman are lifting kettlebells."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_136",
        "captions": {
            "2": "Cat is scratching its paw.",
            "3": "Cat is pushing a toy with its paw.",
            "1": "Cat licking its paw."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_137",
        "captions": {
            "2": "Person mixing water in a glass.",
            "3": "Person spilling water all over the table.",
            "1": "Person pouring water into a glass."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_138",
        "captions": {
            "2": "The chef is drizzling lemon juice on the steak.",
            "3": "The chef is slicing the steak.",
            "1": "The chef is sprinkling salt on the steak."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_139",
        "captions": {
            "2": "Dog sitting beside a woman.",
            "3": "Dog jumping over a woman.",
            "1": "Dog giving a high five to a woman."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_140",
        "captions": {
            "1": "A person squeezes out toothpaste onto a toothbrush.",
            "2": "A person is rinsing the toothbrush.",
            "3": "A person is painting with the toothbrush."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_141",
        "captions": {
            "2": "Armored vehicles reversing.",
            "3": "Armored vehicles being repaired.",
            "1": "Armored vehicles driving."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_142",
        "captions": {
            "2": "Knight sharpening sword with a stone.",
            "3": "Knight polishing sword under a tree.",
            "1": "Knight drawing sword from sheath."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_143",
        "captions": {
            "2": "A woman rehearses a ballet routine.",
            "3": "A woman paints a canvas with dynamic strokes.",
            "1": "A woman performs a freestyle dance."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_144",
        "captions": {
            "2": "The athletic man is adjusting his prosthetic running blade.",
            "3": "The athletic man is polishing his prosthetic running blade.",
            "1": "The athletic man is taking off his prosthetic running blade."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_145",
        "captions": {
            "2": "A woman skips across the pebble creek.",
            "3": "A woman balances on rocks in the pebble creek.",
            "1": "A woman walks across the pebble creek."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_146",
        "captions": {
            "2": "The person is tapping the button and rolling the wheel on a mouse.",
            "3": "The person is clicking the button and dragging the mouse across the table.",
            "1": "The person is clicking the button and rolling the wheel on a mouse."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_147",
        "captions": {
            "2": "A woman is trimming a bouquet of daffodils.",
            "3": "A woman is wrapping a bouquet of daffodils.",
            "1": "A woman is arranging a bouquet of daffodils."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_148",
        "captions": {
            "2": "The woman is folding a towel on her neck.",
            "3": "The woman is balancing a towel on her neck.",
            "1": "The woman is putting a towel on her neck."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_149",
        "captions": {
            "2": "A girl spins around while playing the saxophone.",
            "3": "A girl stands still while playing the saxophone.",
            "1": "A girl tango dances while playing the saxophone."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_150",
        "captions": {
            "2": "Doctor examining the patient.",
            "3": "Doctor singing to the patient.",
            "1": "Doctor giving medicine to a patient."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_151",
        "captions": {
            "2": "Researcher showing children how to examine a frog.",
            "3": "Researcher demonstrating to children how to paint a frog.",
            "1": "Researcher teaching children how to dissect a frog."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_152",
        "captions": {
            "2": "A hand is writing notes in the book.",
            "3": "A hand is folding the pages of a book.",
            "1": "A hand is turning over pages of a book."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_153",
        "captions": {
            "2": "A man is rolling a pizza dough.",
            "3": "A man is tossing a pizza dough in the air.",
            "1": "A man is stretching a pizza dough."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_154",
        "captions": {
            "2": "The woman is waving her hand.",
            "3": "The woman is fixing her hair.",
            "1": "The woman is doing an invitation gesture."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_155",
        "captions": {
            "2": "A man is breaking the balls on the pool table.",
            "3": "A man is cleaning the pool table.",
            "1": "A man is playing pool."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_156",
        "captions": {
            "2": "A man is tapping the computer screen.",
            "3": "A man is yelling at the computer screen.",
            "1": "A man is attempting to punch a computer screen."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_157",
        "captions": {
            "2": "The man is roasting cocoa seeds.",
            "3": "The man is arranging cocoa seeds in patterns.",
            "1": "The man is peeling cocoa seeds."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_158",
        "captions": {
            "2": "A man is painting on a white sneaker.",
            "3": "A man is polishing a white sneaker.",
            "1": "A man is drawing on a white sneaker."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_159",
        "captions": {
            "2": "Woman performing alternating side plank.",
            "3": "Woman performing yoga poses.",
            "1": "Woman performing alternating leg lift plank."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_160",
        "captions": {
            "2": "Woman practising her kicks.",
            "3": "Woman performing a dance routine.",
            "1": "Woman practising her punches."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_161",
        "captions": {
            "2": "Doctor measuring the woman's blood pressure.",
            "3": "Doctor organizing medical supplies.",
            "1": "Doctor giving an injection to the woman."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_162",
        "captions": {
            "2": "A man is cutting a pizza.",
            "3": "A man is assembling a pizza box.",
            "1": "A man is opening a pizza box."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_163",
        "captions": {
            "2": "Stirring sauce into a bowl of vegetables.",
            "3": "Pouring juice into a bowl of vegetables.",
            "1": "Squeezing sauce into a bowl of vegetables."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_164",
        "captions": {
            "2": "Man lighting a cigarette.",
            "3": "Man throwing a cigarette.",
            "1": "Man smoking a cigarette."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_165",
        "captions": {
            "2": "Man tuning an instrument.",
            "3": "Man packing up an instrument.",
            "1": "Man playing an instrument."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_166",
        "captions": {
            "2": "A woman is typing on her tablet.",
            "3": "A woman is fixing her tablet.",
            "1": "A woman is playing on her tablet."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_167",
        "captions": {
            "2": "A woman is rocking a baby.",
            "3": "A woman is tucking in a baby.",
            "1": "A woman is patting a baby."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_168",
        "captions": {
            "2": "Two men are sparring.",
            "3": "Two men are playing chess.",
            "1": "Two men are boxing."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_169",
        "captions": {
            "2": "A girl is dancing to a man playing ukulele.",
            "3": "A girl is whistling for a man playing ukulele.",
            "1": "A girl is clapping hands for a man playing ukulele."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_170",
        "captions": {
            "2": "Parents and daughter watching a movie together.",
            "3": "Parents and daughter painting together.",
            "1": "Parents and daughter playing video games together."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_171",
        "captions": {
            "1": "Cat eating food on the ground.",
            "2": "Cat sniffing food on the ground.",
            "3": "Cat playing with food on the ground."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_172",
        "captions": {
            "1": "Passengers are taking a seat on the subway.",
            "2": "Passengers are standing in the subway.",
            "3": "Passengers are sleeping in the subway."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_173",
        "captions": {
            "2": "People riding bikes up a hill.",
            "3": "People repairing bikes on the hill.",
            "1": "People pushing bikes up a hill."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_174",
        "captions": {
            "2": "A man is rinsing the car.",
            "3": "A man is waxing the car.",
            "1": "A man is washing the car."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_175",
        "captions": {
            "2": "A person measuring a cutting line on a wood surface.",
            "3": "A person painting a line on a wood surface.",
            "1": "A person drawing a cutting line on a wood surface."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_176",
        "captions": {
            "2": "A person is frosting cakes.",
            "3": "A person is painting cakes.",
            "1": "A person is decorating cakes."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_177",
        "captions": {
            "2": "A man is checking the water quality of a swimming pool.",
            "3": "A man is fixing a ladder near the swimming pool.",
            "1": "A man is cleaning a swimming pool."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_178",
        "captions": {
            "2": "Woman aiming in VR shooting game.",
            "3": "Woman dancing in VR shooting game.",
            "1": "Woman playing VR shooting game."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_179",
        "captions": {
            "1": "A squirrel eating food.",
            "2": "A squirrel gathering food.",
            "3": "A squirrel playing with the food."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_180",
        "captions": {
            "2": "A little girl walks towards a woman and hugs her.",
            "3": "A little girl skips towards a woman and hugs her.",
            "1": "A little girl runs towards a woman and hugs her."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_181",
        "captions": {
            "2": "A scientist appears to be taking notes during the experiment.",
            "3": "A scientist appears to be conducting calculations while experimenting.",
            "1": "A scientist appears confused and frustrated during the experiment."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_182",
        "captions": {
            "2": "A man anxiously raises his arm to rub his forehead.",
            "3": "A man anxiously raises his arm to adjust his glasses.",
            "1": "A man anxiously raises his arm to look at his watch."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "action_183",
        "captions": {
            "2": "A little child is splashing in the water.",
            "3": "A little child is swimming in the water.",
            "1": "A little child is water jumping."
        },
        "dataset": "tempcompass",
        "aspect": "action"
    },
    {
        "video": "direction_1",
        "captions": {
            "2": "A cyan sphere is rolling slightly to the left.",
            "3": "A cyan sphere is rolling upwards to the left.",
            "1": "A stationary cyan sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_2",
        "captions": {
            "2": "A gray cylinder moves down and to the left.",
            "3": "A gray cylinder moves up and to the left.",
            "1": "A gray cylinder moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_3",
        "captions": {
            "2": "A brown cylinder moves down and to the right.",
            "3": "A brown cylinder moves up and to the right.",
            "1": "A brown cylinder moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_4",
        "captions": {
            "2": "A blue cube moves down and to the right.",
            "3": "A blue cube moves up and to the right.",
            "1": "A blue cube moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_5",
        "captions": {
            "2": "The red sphere rolls slightly to the left.",
            "3": "The red sphere rolls forward to the right.",
            "1": "A stationary red sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_6",
        "captions": {
            "2": "A gray sphere rolls slowly to the left.",
            "3": "A gray sphere rolls up to the left.",
            "1": "A gray sphere remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_7",
        "captions": {
            "2": "A cyan sphere rolls slightly to the left.",
            "3": "A cyan sphere rolls to the left.",
            "1": "A stationary cyan sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_8",
        "captions": {
            "2": "A brown sphere moves up and to the right.",
            "3": "A brown sphere moves down and to the right.",
            "1": "A brown sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_9",
        "captions": {
            "2": "The red sphere moves slightly to the left in the middle of the video.",
            "3": "The red sphere rolls in a circle during the video.",
            "1": "The red sphere remains stationary throughout the video."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_10",
        "captions": {
            "2": "Yellow sphere moves up and to the left.",
            "3": "Yellow sphere moves down and to the left.",
            "1": "Yellow sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_11",
        "captions": {
            "2": "A yellow cylinder moves down and to the left.",
            "3": "A yellow cylinder moves up and to the left.",
            "1": "A yellow cylinder moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_12",
        "captions": {
            "2": "A blue sphere moves slightly to the left.",
            "3": "A blue sphere rolls backward.",
            "1": "A stationary blue sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_13",
        "captions": {
            "2": "A brown sphere moves up and to the right.",
            "3": "A brown sphere moves down and to the right.",
            "1": "A brown sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_14",
        "captions": {
            "2": "A green sphere moves up and to the left.",
            "3": "A green sphere moves down and to the left.",
            "1": "A green sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_15",
        "captions": {
            "2": "A blue sphere starts to roll gently forward.",
            "3": "A blue sphere bounces forward.",
            "1": "A stationary blue sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_16",
        "captions": {
            "2": "A purple sphere moves down and to the left.",
            "3": "A purple sphere moves down and to the right.",
            "1": "A purple sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_17",
        "captions": {
            "2": "A red sphere moves down and to the right.",
            "3": "A red sphere moves up and to the right.",
            "1": "A red sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_18",
        "captions": {
            "2": "A yellow sphere moves up and to the right.",
            "3": "A yellow sphere moves down and to the right.",
            "1": "A yellow sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_19",
        "captions": {
            "2": "The purple cube moves down and to the left.",
            "3": "The purple cube moves up and to the left.",
            "1": "The purple cube moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_20",
        "captions": {
            "2": "A gray sphere moves down and to the right.",
            "3": "A gray sphere moves up and to the right.",
            "1": "A gray sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_21",
        "captions": {
            "2": "The red sphere slowly rolls to the left in the video.",
            "3": "The red sphere bounces upwards and downwards in the video.",
            "1": "The red sphere remains stationary in the video."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_22",
        "captions": {
            "2": "A gray cube moves up and to the left.",
            "3": "A gray cube moves down and to the left.",
            "1": "A gray cube moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_23",
        "captions": {
            "2": "A green sphere moves up and to the left.",
            "3": "A green sphere moves down and to the left.",
            "1": "A green sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_24",
        "captions": {
            "2": "The blue cube slides slightly to the right.",
            "3": "The blue cube slides upward to the right.",
            "1": "The blue cube remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_25",
        "captions": {
            "2": "A green sphere moves down and to the right.",
            "3": "A green sphere moves up and to the right.",
            "1": "A green sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_26",
        "captions": {
            "2": "A purple sphere slightly moves to the left.",
            "3": "A purple sphere rolls forward.",
            "1": "A stationary purple sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_27",
        "captions": {
            "2": "A blue sphere moves down and to the left.",
            "3": "A blue sphere moves up and to the left.",
            "1": "A blue sphere moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_28",
        "captions": {
            "2": "The gray sphere rolls slightly to the right in the video.",
            "3": "The gray sphere rolls downwards to the right in the video.",
            "1": "The gray sphere is stationary in the video."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_29",
        "captions": {
            "2": "A yellow sphere moves up and to the right.",
            "3": "A yellow sphere moves down and to the right.",
            "1": "A yellow sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_30",
        "captions": {
            "2": "The purple sphere is swaying slightly side to side.",
            "3": "The purple sphere is spinning in a circle.",
            "1": "The stationary purple sphere is not moving."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_31",
        "captions": {
            "2": "A green cylinder moves down and to the left.",
            "3": "A green cylinder moves up and to the left.",
            "1": "A green cylinder moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_32",
        "captions": {
            "2": "A purple sphere moves down and to the left.",
            "3": "A purple sphere moves up and to the left.",
            "1": "A purple sphere moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_33",
        "captions": {
            "2": "A blue sphere moves down and to the left.",
            "3": "A blue sphere moves up and to the left.",
            "1": "A blue sphere moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_34",
        "captions": {
            "2": "A red cube moves down and to the right.",
            "3": "A red cube moves up and to the right.",
            "1": "A red cube moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_35",
        "captions": {
            "2": "A red sphere moves down and to the right.",
            "3": "A red sphere moves up and to the right.",
            "1": "A red sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_36",
        "captions": {
            "2": "A green cylinder moves down and to the right.",
            "3": "A green cylinder moves up and to the right.",
            "1": "A green cylinder moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_37",
        "captions": {
            "2": "A green sphere rolls slightly to the right.",
            "3": "A green sphere rolls right, then downwards.",
            "1": "A green sphere remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_38",
        "captions": {
            "2": "A red sphere slowly rolls to the right.",
            "3": "A red sphere rolls right and upwards.",
            "1": "A stationary red sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_39",
        "captions": {
            "2": "A cyan cube moves downward and to the left.",
            "3": "A cyan cube moves upward and to the left.",
            "1": "A cyan cube moves downward and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_40",
        "captions": {
            "2": "A gray sphere is moving up and to the right.",
            "3": "A gray sphere is moving down and to the right.",
            "1": "A gray sphere is moving up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_41",
        "captions": {
            "2": "A cyan sphere moves up and to the right.",
            "3": "A cyan sphere moves down and to the right.",
            "1": "A cyan sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_42",
        "captions": {
            "2": "A red sphere moves up and to the right.",
            "3": "A red sphere moves down and to the right.",
            "1": "A red sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_43",
        "captions": {
            "2": "The yellow cube moves down and to the left.",
            "3": "The yellow cube moves up and to the left.",
            "1": "The yellow cube moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_44",
        "captions": {
            "2": "A green cylinder moves down and to the left.",
            "3": "A green cylinder moves up and to the left.",
            "1": "A green cylinder moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_45",
        "captions": {
            "2": "A yellow sphere is moving to the left.",
            "3": "A yellow sphere is moving down and to the right.",
            "1": "A yellow sphere is moving up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_46",
        "captions": {
            "2": "A cyan sphere moves up and to the left.",
            "3": "A cyan sphere moves down and to the left.",
            "1": "A cyan sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_47",
        "captions": {
            "2": "A blue sphere rolls slightly to the right.",
            "3": "A blue sphere rolls to the right.",
            "1": "A stationary blue sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_48",
        "captions": {
            "2": "A yellow sphere moves up and to the right.",
            "3": "A yellow sphere moves down and to the right.",
            "1": "A yellow sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_49",
        "captions": {
            "2": "A blue sphere moves down and to the right.",
            "3": "A blue sphere moves up and to the right.",
            "1": "A blue sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_50",
        "captions": {
            "2": "A brown sphere moves down and to the right.",
            "3": "A brown sphere moves up and to the right.",
            "1": "A brown sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_51",
        "captions": {
            "2": "A red cylinder is moving down and to the right.",
            "3": "A red cylinder is moving up and to the right.",
            "1": "A red cylinder is moving down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_52",
        "captions": {
            "2": "The brown cube is moving left and then down.",
            "3": "The brown cube is moving up and to the left.",
            "1": "The brown cube is moving down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_53",
        "captions": {
            "2": "A yellow cube moves down and to the right.",
            "3": "A yellow cube moves up and to the right.",
            "1": "A yellow cube moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_54",
        "captions": {
            "2": "Gray sphere moving up and to the left.",
            "3": "Gray sphere moving down and to the left.",
            "1": "Gray sphere moving up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_55",
        "captions": {
            "2": "The brown sphere rolls slightly within the video.",
            "3": "The brown sphere bounces around within the video.",
            "1": "The brown sphere remains stationary in the video."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_56",
        "captions": {
            "2": "A yellow cube moves down and to the left.",
            "3": "A yellow cube moves up and to the left.",
            "1": "A yellow cube moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_57",
        "captions": {
            "2": "A brown cube moves straight down.",
            "3": "A brown cube moves up and to the right.",
            "1": "A brown cube moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_58",
        "captions": {
            "2": "A red sphere moves down and to the right.",
            "3": "A red sphere moves up and to the right.",
            "1": "A red sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_59",
        "captions": {
            "2": "A blue sphere moving up and to the left.",
            "3": "A blue sphere moving down and to the left.",
            "1": "A blue sphere moving up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_60",
        "captions": {
            "2": "A green sphere starts rolling slightly to the right.",
            "3": "A green sphere starts rolling towards the camera.",
            "1": "A stationary green sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_61",
        "captions": {
            "2": "A brown cylinder moves down and to the left.",
            "3": "A brown cylinder moves upwards and to the left.",
            "1": "A brown cylinder moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_62",
        "captions": {
            "2": "A yellow sphere moves down and to the right.",
            "3": "A yellow sphere moves upward and to the right.",
            "1": "A yellow sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_63",
        "captions": {
            "2": "A gray sphere moves down and to the right.",
            "3": "A gray sphere moves up and to the right.",
            "1": "A gray sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_64",
        "captions": {
            "2": "The cyan cube moves down and to the left.",
            "3": "The cyan cube moves up and to the left.",
            "1": "The cyan cube moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_65",
        "captions": {
            "2": "The purple sphere moves up and to the right.",
            "3": "The purple sphere moves down and to the right.",
            "1": "The purple sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_66",
        "captions": {
            "2": "A red sphere moves up and to the right.",
            "3": "A red sphere moves down and to the right.",
            "1": "A red sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_67",
        "captions": {
            "2": "A green sphere is moving straight to the right.",
            "3": "A green sphere is moving up and to the left.",
            "1": "A green sphere is moving down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_68",
        "captions": {
            "2": "Cyan sphere moving down and to the right.",
            "3": "Cyan sphere moving up and to the right.",
            "1": "Cyan sphere moving down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_69",
        "captions": {
            "2": "A red sphere moves to the right and slightly left.",
            "3": "A red sphere moves down and to the left.",
            "1": "A red sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_70",
        "captions": {
            "2": "A gray cube moves down and to the right.",
            "3": "A gray cube moves up and to the right.",
            "1": "A gray cube moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_71",
        "captions": {
            "2": "The green sphere rolls slightly to the left.",
            "3": "The green sphere rolls downwards to the left.",
            "1": "The green sphere remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_72",
        "captions": {
            "2": "Red sphere moves up and to the right.",
            "3": "Red sphere moves down and to the right.",
            "1": "Red sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_73",
        "captions": {
            "2": "A yellow sphere is moving down and to the right.",
            "3": "A yellow sphere is moving up and to the right.",
            "1": "A yellow sphere is moving down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_74",
        "captions": {
            "2": "A green sphere moves slightly leftwards in the video.",
            "3": "A green sphere moves to the bottom-left corner of the video.",
            "1": "A stationary green sphere is shown in the video."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_75",
        "captions": {
            "2": "A purple sphere moves up and to the left.",
            "3": "A purple sphere moves down and to the left.",
            "1": "A purple sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_76",
        "captions": {
            "2": "The yellow sphere is slowly rolling to the right.",
            "3": "The yellow sphere is rolls to the right.",
            "1": "The yellow sphere is stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_77",
        "captions": {
            "2": "A yellow sphere moves down and to the right.",
            "3": "A yellow sphere moves up and to the right.",
            "1": "A yellow sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_78",
        "captions": {
            "2": "A blue sphere moves to the right.",
            "3": "A blue sphere moves down and to the left.",
            "1": "A blue sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_79",
        "captions": {
            "2": "A red sphere moves down and to the right.",
            "3": "A red sphere moves down and to the left.",
            "1": "A red sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_80",
        "captions": {
            "2": "A brown sphere moves up and to the right.",
            "3": "A brown sphere moves down and to the right.",
            "1": "A brown sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_81",
        "captions": {
            "2": "The red sphere moves left and then up.",
            "3": "The red sphere moves down and to the left.",
            "1": "The red sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_82",
        "captions": {
            "2": "A red sphere is displaced slightly to the right.",
            "3": "A red sphere is moves to the right in the video.",
            "1": "A stationary red sphere is in view."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_83",
        "captions": {
            "2": "A blue sphere is slowly moving from left to right.",
            "3": "A blue sphere is rolling in a zig-zag pattern from left to right.",
            "1": "A stationary blue sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_84",
        "captions": {
            "2": "A cyan sphere moves slightly towards the viewer.",
            "3": "A cyan sphere rolls rapidly towards the viewer.",
            "1": "A stationary cyan sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_85",
        "captions": {
            "2": "The brown sphere moves up and to the right.",
            "3": "The brown sphere moves down and to the right.",
            "1": "The brown sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_86",
        "captions": {
            "2": "A blue sphere moves down and to the right.",
            "3": "A blue sphere moves up and to the right.",
            "1": "A blue sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_87",
        "captions": {
            "2": "A purple sphere is gently rolling to the right.",
            "3": "A purple sphere is rolling upwards to the right.",
            "1": "A stationary purple sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_88",
        "captions": {
            "2": "The purple sphere moves down and to the left.",
            "3": "The purple sphere moves up and to the left.",
            "1": "The purple sphere moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_89",
        "captions": {
            "2": "The yellow cylinder moves to the left.",
            "3": "The yellow cylinder moves up and to the right.",
            "1": "The yellow cylinder moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_90",
        "captions": {
            "2": "The yellow sphere is slowly drifting to the right.",
            "3": "The yellow sphere is slowly rolling towards the right and upwards.",
            "1": "The yellow sphere is stationary in the video."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_91",
        "captions": {
            "2": "A green sphere moves up and to the left.",
            "3": "A green sphere moves down and to the left.",
            "1": "A green sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_92",
        "captions": {
            "2": "A gray sphere moves down and to the left.",
            "3": "A gray sphere moves up and to the left.",
            "1": "A gray sphere moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_93",
        "captions": {
            "2": "A yellow sphere is moving slightly to the right in the video.",
            "3": "A yellow sphere is moving downwards to the right in the video.",
            "1": "A stationary yellow sphere is featured in the video."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_94",
        "captions": {
            "2": "A purple sphere is slowly rolling leftwards in the video.",
            "3": "A purple sphere is slowly rollling leftwars and downwards in the video.",
            "1": "A stationary purple sphere is featured in the video."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_95",
        "captions": {
            "2": "A green sphere is moving straight down.",
            "3": "A green sphere is moving up and to the left.",
            "1": "A green sphere is moving down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_96",
        "captions": {
            "2": "A yellow cylinder is moving straight down.",
            "3": "A yellow cylinder is moving up and to the left.",
            "1": "A yellow cylinder is moving down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_97",
        "captions": {
            "2": "A red cylinder moves down and to the right.",
            "3": "A red cylinder moves up and to the right.",
            "1": "A red cylinder moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_98",
        "captions": {
            "2": "A red sphere moves down and to the left.",
            "3": "A red sphere moves up and to the left.",
            "1": "A red sphere moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_99",
        "captions": {
            "2": "The cyan cylinder is moving down and to the left.",
            "3": "The cyan cylinder is moving up and to the left.",
            "1": "The cyan cylinder is moving down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_100",
        "captions": {
            "2": "A gray sphere moves slightly to the right.",
            "3": "A gray sphere moves backward and to the right slightly.",
            "1": "A gray sphere remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_101",
        "captions": {
            "2": "A yellow cube moves down and to the right.",
            "3": "A yellow cube moves up and to the right.",
            "1": "A yellow cube moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_102",
        "captions": {
            "2": "A brown cylinder moves to the left.",
            "3": "A brown cylinder moves up and to the right.",
            "1": "A brown cylinder moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_103",
        "captions": {
            "2": "The blue cube is moving down and to the right.",
            "3": "The blue cube is moving up and to the right.",
            "1": "The blue cube is moving down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_104",
        "captions": {
            "2": "The gray sphere is rolling slightly to the right.",
            "3": "The gray sphere is rolling in a zigzag pattern.",
            "1": "The gray sphere remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_105",
        "captions": {
            "2": "A blue sphere moves up and to the right.",
            "3": "A blue sphere moves down and to the right.",
            "1": "A blue sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_106",
        "captions": {
            "2": "The purple sphere moves up and to the left.",
            "3": "The purple sphere moves down and to the left.",
            "1": "The purple sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_107",
        "captions": {
            "2": "The gray sphere moves down and to the right.",
            "3": "The gray sphere moves upwards and to the right.",
            "1": "The gray sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_108",
        "captions": {
            "2": "A yellow sphere moves up and to the left.",
            "3": "A yellow sphere moves down and to the left.",
            "1": "A yellow sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_109",
        "captions": {
            "2": "A yellow sphere moves up and to the left.",
            "3": "A yellow sphere moves down and to the left.",
            "1": "A yellow sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_110",
        "captions": {
            "2": "The cyan sphere rolls slightly to the left.",
            "3": "The cyan sphere rolls slightly upwards to the left.",
            "1": "The cyan sphere is stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_111",
        "captions": {
            "2": "A gray sphere moves up and to the left.",
            "3": "A gray sphere moves down and to the left.",
            "1": "A gray sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_112",
        "captions": {
            "2": "The brown sphere is moving slowly to the left.",
            "3": "The brown sphere is moving slowly downwards to the left .",
            "1": "The brown sphere is stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_113",
        "captions": {
            "2": "A brown sphere rolls to the left in the video.",
            "3": "A brown sphere rolls up and to the left in the video.",
            "1": "A brown sphere remains stationary in the video."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_114",
        "captions": {
            "2": "A cyan cylinder moves downwards.",
            "3": "A cyan cylinder moves upwards and to the right.",
            "1": "A cyan cylinder moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_115",
        "captions": {
            "2": "The green sphere moves up and to the left.",
            "3": "The green sphere moves down and to the left.",
            "1": "The green sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_116",
        "captions": {
            "2": "The gray sphere moves slightly to the left.",
            "3": "The gray sphere moves slightly to the left and rightwards.",
            "1": "The gray sphere remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_117",
        "captions": {
            "2": "A blue sphere moves down and to the left.",
            "3": "A blue sphere moves up and to the left.",
            "1": "A blue sphere moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_118",
        "captions": {
            "2": "Cyan sphere moves down and to the right.",
            "3": "Cyan sphere moves up and to the right.",
            "1": "Cyan sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_119",
        "captions": {
            "2": "A red sphere moves up and to the left.",
            "3": "A red sphere moves down and to the left.",
            "1": "A red sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_120",
        "captions": {
            "2": "A gray cube moves down and to the right.",
            "3": "A gray cube moves up and to the right.",
            "1": "A gray cube moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_121",
        "captions": {
            "2": "A brown sphere rolls leftwards slightly.",
            "3": "A brown sphere rolls leftwards and downwards slightly.",
            "1": "A brown sphere remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_122",
        "captions": {
            "2": "The brown sphere is slowly moving to the right throughout the video.",
            "3": "The brown sphere is is slowly moving in a circle throughout the video.",
            "1": "The brown sphere is stationary throughout the video."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_123",
        "captions": {
            "2": "A gray sphere moves up and to the left.",
            "3": "A gray sphere moves down and to the left.",
            "1": "A gray sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_124",
        "captions": {
            "2": "Cyan cylinder moves down and to the left.",
            "3": "Cyan cylinder moves up and to the left.",
            "1": "Cyan cylinder moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_125",
        "captions": {
            "2": "A brown sphere moves to the right.",
            "3": "A brown sphere moves down and to the left.",
            "1": "A brown sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_126",
        "captions": {
            "2": "The yellow sphere rolls slightly upwards.",
            "3": "The yellow sphere rolls slightly to the right, then upwards.",
            "1": "The yellow sphere remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_127",
        "captions": {
            "2": "A red sphere rolls slightly downwards.",
            "3": "A red sphere rolls slightly downwards to the left.",
            "1": "A red sphere remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_128",
        "captions": {
            "2": "A red sphere moves straight up.",
            "3": "A red sphere moves down and to the right.",
            "1": "A red sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_129",
        "captions": {
            "2": "A green sphere moves down and to the right.",
            "3": "A green sphere moves up and to the right.",
            "1": "A green sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_130",
        "captions": {
            "2": "A red sphere moves right and downwards.",
            "3": "A red sphere moves down and to the left.",
            "1": "A red sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_131",
        "captions": {
            "2": "A green sphere moves straight to the right.",
            "3": "A green sphere moves down and to the left.",
            "1": "A green sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_132",
        "captions": {
            "2": "A red cylinder moves down and to the right.",
            "3": "A red cylinder moves up and to the right.",
            "1": "A red cylinder moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_133",
        "captions": {
            "2": "A green sphere moves up and to the right.",
            "3": "A green sphere moves down and to the right.",
            "1": "A green sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_134",
        "captions": {
            "2": "The gray sphere moves up and to the left.",
            "3": "The gray sphere moves down and to the left.",
            "1": "The gray sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_135",
        "captions": {
            "2": "The yellow sphere moves up and to the right.",
            "3": "The yellow sphere moves down and to the right.",
            "1": "The yellow sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_136",
        "captions": {
            "2": "The green sphere is rolling to the left.",
            "3": "The green sphere is rolling diagonally to the left.",
            "1": "The green sphere is stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_137",
        "captions": {
            "2": "A yellow sphere moves down and to the left.",
            "3": "A yellow sphere moves up and to the left.",
            "1": "A yellow sphere moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_138",
        "captions": {
            "2": "A brown sphere rolls to the right.",
            "3": "A brown sphere rolls diagonally to the right.",
            "1": "A brown sphere remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_139",
        "captions": {
            "2": "A red sphere moves up and to the left.",
            "3": "A red sphere moves down and to the left.",
            "1": "A red sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_140",
        "captions": {
            "2": "A purple sphere moves up and to the right.",
            "3": "A purple sphere moves down and to the right.",
            "1": "A purple sphere moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_141",
        "captions": {
            "2": "The yellow sphere is moving slightly to the right.",
            "3": "The yellow sphere is moving slightly to the right diagonally.",
            "1": "The yellow sphere is stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_142",
        "captions": {
            "2": "A yellow sphere moves to the left.",
            "3": "A yellow sphere moves in a zigzag pattern.",
            "1": "A stationary yellow sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_143",
        "captions": {
            "2": "A cyan sphere moves up and to the left.",
            "3": "A cyan sphere moves down and to the left.",
            "1": "A cyan sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_144",
        "captions": {
            "2": "A green cube moves up and to the right.",
            "3": "A green cube moves down and to the right.",
            "1": "A green cube moves up and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_145",
        "captions": {
            "2": "A brown sphere moves rightward.",
            "3": "A brown sphere moves down and to the left.",
            "1": "A brown sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_146",
        "captions": {
            "2": "A purple sphere moves up and to the left.",
            "3": "A purple sphere moves down and to the left.",
            "1": "A purple sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_147",
        "captions": {
            "2": "A purple sphere rolls upwards.",
            "3": "A purple sphere rolls upwards to the left.",
            "1": "A stationary purple sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_148",
        "captions": {
            "2": "A red sphere moves down and to the right.",
            "3": "A red sphere moves up and to the right.",
            "1": "A red sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_149",
        "captions": {
            "2": "A purple sphere moves down and to the left.",
            "3": "A purple sphere moves up and to the left.",
            "1": "A purple sphere moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_150",
        "captions": {
            "2": "A purple sphere gently shifts to the left.",
            "3": "A purple sphere bounces to the left.",
            "1": "A stationary purple sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_151",
        "captions": {
            "2": "A gray sphere moves horizontally to the right.",
            "3": "A gray sphere moves down and to the left.",
            "1": "A gray sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_152",
        "captions": {
            "2": "A brown sphere moves slightly towards the viewer.",
            "3": "A brown sphere moves slightly leftwards and towards the viewer.",
            "1": "A brown sphere remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_153",
        "captions": {
            "2": "Green cube moves down and to the left.",
            "3": "Green cube moves up and to the left.",
            "1": "Green cube moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_154",
        "captions": {
            "2": "A cyan sphere moves down and to the right.",
            "3": "A cyan sphere moves up and to the right.",
            "1": "A cyan sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_155",
        "captions": {
            "2": "Red cube moves down and to the left.",
            "3": "Red cube moves up and to the left.",
            "1": "Red cube moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_156",
        "captions": {
            "2": "A blue sphere moves down and to the right.",
            "3": "A blue sphere moves up and to the right.",
            "1": "A blue sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_157",
        "captions": {
            "2": "A cyan sphere moves down and to the left.",
            "3": "A cyan sphere moves up and to the left.",
            "1": "A cyan sphere moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_158",
        "captions": {
            "2": "A brown sphere rolls gently to the right in the video.",
            "3": "A brown sphere bounces to the right in the video.",
            "1": "A brown sphere remains stationary in the video."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_159",
        "captions": {
            "2": "The yellow sphere moves slightly to the right during the video.",
            "3": "The yellow sphere rolls in a circle in the video.",
            "1": "The yellow sphere remains stationary throughout the video."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_160",
        "captions": {
            "2": "The red sphere slowly slides towards the right throughout the video.",
            "3": "The red sphere oscillates left and right throughout the video.",
            "1": "The red sphere remains stationary throughout the video."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_161",
        "captions": {
            "2": "The gray cylinder is moving down and to the right.",
            "3": "The gray cylinder is moving up and to the right.",
            "1": "The gray cylinder is moving down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_162",
        "captions": {
            "2": "The purple cube moves down and to the left.",
            "3": "The purple cube moves up and to the left.",
            "1": "The purple cube moves down and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_163",
        "captions": {
            "2": "A gray sphere moves down and to the right.",
            "3": "A gray sphere moves up and to the right.",
            "1": "A gray sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_164",
        "captions": {
            "2": "Green sphere moves down and to the right.",
            "3": "Green sphere moves up and to the right.",
            "1": "Green sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_165",
        "captions": {
            "2": "A blue sphere rolls forward slowly.",
            "3": "A blue sphere bounces forward.",
            "1": "A blue sphere remains stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_166",
        "captions": {
            "2": "A cyan sphere moves up and to the left.",
            "3": "A cyan sphere moves down and to the left.",
            "1": "A cyan sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_167",
        "captions": {
            "2": "The red cube moves to the left and then stops.",
            "3": "The red cube moves up and to the right.",
            "1": "The red cube moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_168",
        "captions": {
            "2": "The green sphere is slowly rolling to the left.",
            "3": "The green sphere is slowly rolling upwards and to the left.",
            "1": "The green sphere is stationary."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_169",
        "captions": {
            "2": "A gray sphere moves to the right.",
            "3": "A gray sphere moves down and to the left.",
            "1": "A gray sphere moves up and to the right."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_170",
        "captions": {
            "2": "A brown sphere slowly rolls to the right.",
            "3": "A brown sphere slowly rolls upward to the right.",
            "1": "A stationary brown sphere."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_171",
        "captions": {
            "2": "Gray sphere moving to the right and slightly down.",
            "3": "Gray sphere moving up and to the right.",
            "1": "Gray sphere moving down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_172",
        "captions": {
            "2": "A gray sphere moves down and to the right.",
            "3": "A gray sphere moves up and to the right.",
            "1": "A gray sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_173",
        "captions": {
            "2": "A blue sphere moves down and to the right.",
            "3": "A blue sphere moves up and to the right.",
            "1": "A blue sphere moves down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_174",
        "captions": {
            "2": "Red sphere moving down and to the right.",
            "3": "Red sphere moving up and to the right.",
            "1": "Red sphere moving down and to the left."
        },
        "dataset": "mvbench",
        "aspect": "direction"
    },
    {
        "video": "direction_175",
        "captions": {
            "2": "People gliding from left to right while skating.",
            "3": "People gliding away from the camera while skating.",
            "1": "People gliding towards the camera while skating."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_176",
        "captions": {
            "2": "The sun is setting.",
            "3": "The sun is moving across the sky horizontally.",
            "1": "The sun is rising."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_177",
        "captions": {
            "2": "The clock hands are moving counterclockwise.",
            "3": "The clock hands are remain stationary.",
            "1": "The clock hands are moving clockwise."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_178",
        "captions": {
            "2": "Football moving from right to left.",
            "3": "Football moving towards the screen.",
            "1": "Football moving from left to right."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_179",
        "captions": {
            "2": "Women athletes run diagonally from left to right.",
            "3": "Women athletes run from right to left across the screen.",
            "1": "Women athletes run from left to right across the screen."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_180",
        "captions": {
            "2": "Dolphins swimming along the shore.",
            "3": "Dolphins swimming away from the shore.",
            "1": "Dolphins swimming towards the shore."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_181",
        "captions": {
            "2": "Wind turbines rotating counterclockwise.",
            "3": "Wind turbines rotating in alternating directions.",
            "1": "Wind turbines rotating clockwise."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_182",
        "captions": {
            "2": "A 3D cloud is spinning and moving diagonally down-left from the camera's point of view.",
            "3": "A 3D cloud is spinning and moving downwards from the camera's point of view.",
            "1": "A 3D cloud is spinning and moving leftward from the camera's point of view."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_183",
        "captions": {
            "2": "The light is rotating counterclockwise.",
            "3": "The light is rotating upwards.",
            "1": "The light is rotating clockwise."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_184",
        "captions": {
            "2": "A tennis ball bouncing side to side.",
            "3": "A tennis ball bouncing in a zigzag pattern.",
            "1": "A tennis ball bouncing up and down."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_185",
        "captions": {
            "2": "The galaxy is spinning counterclockwise.",
            "3": "The galaxy is spinning in counterclockwise, then clockwise.",
            "1": "The galaxy is spinning clockwise."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_186",
        "captions": {
            "2": "Asteroids flying from left to right towards the camera.",
            "3": "Asteroids flying away from the camera.",
            "1": "Asteroids flying towards the camera."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_187",
        "captions": {
            "2": "The camera is zooming out from a 3D digital brain.",
            "3": "The camera is circling around a 3D digital brain.",
            "1": "The camera is zooming into a 3D digital brain."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_188",
        "captions": {
            "2": "The chef is putting the bread beside the burger.",
            "3": "The chef is putting the bread under the burger.",
            "1": "The chef is putting the bread on top of the burger."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_189",
        "captions": {
            "2": "The camera moves counterclockwise around the aircraft carrier.",
            "3": "The camera moves up and down the aircraft carrier.",
            "1": "The camera moves clockwise around the aircraft carrier."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_190",
        "captions": {
            "2": "The camera is panning to the side of a fighter jet.",
            "3": "The camera is panning to the front of a fighter jet.",
            "1": "The camera is panning to the back of a fighter jet."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_191",
        "captions": {
            "2": "Five stars appear from right to left.",
            "3": "Five stars appear from top to bottom.",
            "1": "Five stars appear from left to right."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_192",
        "captions": {
            "2": "Zebras moving away from the camera.",
            "3": "Zebras moving right to left.",
            "1": "Zebras moving from left to right."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_193",
        "captions": {
            "2": "Two rows of duck toys moving toward each other on a conveyor belt.",
            "3": "Two rows of duck toys moving in the same direction on a conveyor belt.",
            "1": "Two rows of duck toys moving in opposite directions on a conveyor belt."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_194",
        "captions": {
            "2": "A person is putting a charging gun on top of the car.",
            "3": "A person is unplugging a charging gun from the car.",
            "1": "A person is plugging a charging gun into the car."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_195",
        "captions": {
            "2": "The camera pans right to focus on three football players.",
            "3": "The camera zooms out from three football players.",
            "1": "The camera zooms in on three football players."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_196",
        "captions": {
            "2": "Jellyfish are floating downwards.",
            "3": "Jellyfish are floating in a circle.",
            "1": "Jellyfish are floating upwards."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_197",
        "captions": {
            "2": "A camera flies diagonally upwards among skyscraper buildings.",
            "3": "A camera flies downwards among skyscraper buildings.",
            "1": "A camera flies upwards among skyscraper buildings."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_198",
        "captions": {
            "2": "A little lion is sliding under the back of an adult lion.",
            "3": "A little lion is jumping off from the back of an adult lion.",
            "1": "A little lion is falling down from the back of an adult lion."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_199",
        "captions": {
            "2": "A person puts down a pineapple.",
            "3": "A person rolls a pineapple to the left.",
            "1": "A person picks up a pineapple."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_200",
        "captions": {
            "2": "A puppy is walking to the side the wigwam.",
            "3": "A puppy is walking into the wigwam.",
            "1": "A puppy is walking out of a wigwam."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_201",
        "captions": {
            "2": "Girl jumping onto the platform near the water.",
            "3": "Girl jumping out of the water.",
            "1": "Girl jumping into the water."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_202",
        "captions": {
            "2": "The camera is panning diagonally around the girl.",
            "3": "The camera is panning left to right around the girl.",
            "1": "The camera is panning up and down around the girl."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_203",
        "captions": {
            "2": "The pizza dough is rotated clockwise.",
            "3": "The pizza dough is being flipped up and down.",
            "1": "The pizza dough is rotated counter-clockwise."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    },
    {
        "video": "direction_204",
        "captions": {
            "2": "A camera flies upwards to the right of a stone building.",
            "3": "A camera flies horizontally to the right of a stone building.",
            "1": "A camera flies upwards past a stone building."
        },
        "dataset": "tempcompass",
        "aspect": "direction"
    }
]