{
  "meta_info": {
    "dataset": "mixed_tmp",
    "version": "v1",
    "split": "test"
  },
  "data_list": {
    "hallo3_cbeeccfef265386bdd3efe95081931c5": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/cbeeccfef265386bdd3efe95081931c5.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 2.08,
      "num_frames": 52,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "cbeeccfef265386bdd3efe95081931c5",
      "caption": "In the dimly lit, rustic cabin, a single male figure stands upright, dressed in a greenish-brown jacket and beige shirt, with a black strap visible over his shoulder, as he intently gazes off to the side with a focused expression. The wooden shelves and dim lighting create a cozy, lived-in atmosphere, with the character's dark hair and light skin tone standing out against the muted surroundings.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/cbeeccfef265386bdd3efe95081931c5.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_c14ed96df61f7b8db7b0d908c0a02513": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/c14ed96df61f7b8db7b0d908c0a02513.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 2.08,
      "num_frames": 52,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "c14ed96df61f7b8db7b0d908c0a02513",
      "caption": "A fair-skinned woman with loose, blonde waves stands alone in a dimly lit room, wearing a light green v-neck top, her expression contemplative as she paces near a clock on the wall.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/c14ed96df61f7b8db7b0d908c0a02513.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_738327e285f73162ec64bb8f07315fc6": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/738327e285f73162ec64bb8f07315fc6.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 3.52,
      "num_frames": 88,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "738327e285f73162ec64bb8f07315fc6",
      "caption": "The video shows a single man in his forties with a dark skin tone, dressed in a gray suit, white shirt, and dark tie, engaged in a conversation in a bright and professional office setting with several desks and computers. He stands with an attentive posture, his short, neatly trimmed hair and focused expression conveying a sense of seriousness.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/738327e285f73162ec64bb8f07315fc6.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_d495f86f528bbb7d1af5f9ef9e95e558": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/d495f86f528bbb7d1af5f9ef9e95e558.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 2.36,
      "num_frames": 59,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "d495f86f528bbb7d1af5f9ef9e95e558",
      "caption": "A man in a dark pinstripe suit, white shirt, and red dotted tie stands in a professional office, engaged in a serious conversation with another figure out of focus, while a window in the background provides a neutral backdrop. The man's focused expression and the formal office setting convey a sense of importance and decision-making.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/d495f86f528bbb7d1af5f9ef9e95e558.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_b9047fd306cbe59b58c2c9d4282e4664": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/b9047fd306cbe59b58c2c9d4282e4664.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 3.12,
      "num_frames": 78,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "b9047fd306cbe59b58c2c9d4282e4664",
      "caption": "a person is talking",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/b9047fd306cbe59b58c2c9d4282e4664.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_09d694042b75d8747b907f445844ad13": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/09d694042b75d8747b907f445844ad13.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 1.96,
      "num_frames": 49,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "09d694042b75d8747b907f445844ad13",
      "caption": "A young woman with fair skin and a blue shirt under a black blazer is seen engaging in a conversation in a warmly lit setting, wearing a gold necklace. She is then shown with a focused expression, her hair now dark and long, continuing the conversation in a room with soft lighting and a blurred background, with her attire remaining unchanged.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/09d694042b75d8747b907f445844ad13.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_7a93f260150acbafeb07708c49217fe2": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/7a93f260150acbafeb07708c49217fe2.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 3.52,
      "num_frames": 88,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "7a93f260150acbafeb07708c49217fe2",
      "caption": "The video shows a single woman, likely of Asian descent, with dark hair and an upright posture, wearing a black top, standing in a calm and contemplative indoor environment with a blueish hue, possibly under artificial lighting. The woman remains still, with no significant movements or actions captured, as the camera focuses on her serene presence.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/7a93f260150acbafeb07708c49217fe2.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_1af51191800f83e8211f9015ef09a848": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/1af51191800f83e8211f9015ef09a848.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 3.28,
      "num_frames": 82,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "1af51191800f83e8211f9015ef09a848",
      "caption": "A man with a light complexion and glasses is seated on a beige couch, wearing a purple jacket over a green shirt, in a cozy room with a bookshelf, a poster, and a table with a blue item. He smiles broadly, holding a blue-covered book or folder, surrounded by the warm and inviting atmosphere of his home.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/1af51191800f83e8211f9015ef09a848.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_5d9c87e88424490a64bedde92a2dc735": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/5d9c87e88424490a64bedde92a2dc735.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.12,
      "num_frames": 253,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "5d9c87e88424490a64bedde92a2dc735",
      "caption": "a person is talking",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/5d9c87e88424490a64bedde92a2dc735.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_4ca36f31d631b3b38799ff73ef986a28": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/4ca36f31d631b3b38799ff73ef986a28.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 2.24,
      "num_frames": 56,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "4ca36f31d631b3b38799ff73ef986a28",
      "caption": "The video shows a single middle-aged man, with a weathered complexion and graying beard, sitting in a dimly lit setting, wearing a plaid shirt, as he appears lost in thought. His somber expression and stillness are the only signs of life in a blurred, featureless background that draws the viewer's focus to his contemplative demeanor.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/4ca36f31d631b3b38799ff73ef986a28.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_eb0dc345451035ea604d1732c0224845": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/eb0dc345451035ea604d1732c0224845.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 2.36,
      "num_frames": 59,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "eb0dc345451035ea604d1732c0224845",
      "caption": "The video shows a single male figure, likely in his thirties or forties, with short, dark hair and a light skin tone, engaged in a mid-conversation with an unseen person, his expressive facial features illuminated by a warm glow in a dimly lit, nighttime setting of a possibly deserted parking lot or street.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/eb0dc345451035ea604d1732c0224845.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_71041e6c4031a19e006c631ced339ed1": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/71041e6c4031a19e006c631ced339ed1.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 6.56,
      "num_frames": 164,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "71041e6c4031a19e006c631ced339ed1",
      "caption": "A single individual, a security officer or similar personnel, stands in a break room or kitchen area, wearing a blue uniform with an American flag patch, sporting short dark hair and a beard, with a serious expression. The surroundings feature a well-stocked kitchen with shelves of food items and a microwave, set in a bright and neutral-toned workplace or institutional environment.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/71041e6c4031a19e006c631ced339ed1.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_a98d9f3012bc45d4d181dfb75ada1591": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/a98d9f3012bc45d4d181dfb75ada1591.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 2.56,
      "num_frames": 64,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "a98d9f3012bc45d4d181dfb75ada1591",
      "caption": "The video shows a single, well-dressed individual, likely a man with dark hair and a light skin tone, seated in a formal office setting, engaged in a conversation or listening intently with an upright posture. The individual is dressed in a suit with a patterned tie, surrounded by a wooden wall and a red leather chair, under subdued lighting that highlights the textures of the suit and chair.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/a98d9f3012bc45d4d181dfb75ada1591.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_34a1c758ca9807618a2761c430a0d363": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/34a1c758ca9807618a2761c430a0d363.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 5.84,
      "num_frames": 146,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "34a1c758ca9807618a2761c430a0d363",
      "caption": "The video shows a woman in her thirties or forties, with blonde hair and fair skin, engaged in a phone conversation while wearing a white blouse, necklace, and bracelet, in a dimly lit indoor setting with a soft glow from a window. She is seated, holding a white corded phone to her ear, with the title \"Consulting Producer\" visible, suggesting she is in a professional or production-related environment.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/34a1c758ca9807618a2761c430a0d363.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_7249e9a4364b6a899a15d96ac946b852": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/7249e9a4364b6a899a15d96ac946b852.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 7.32,
      "num_frames": 183,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "7249e9a4364b6a899a15d96ac946b852",
      "caption": "a person is talking",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/7249e9a4364b6a899a15d96ac946b852.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_b39f2335d792edf5c6aa98677e720d39": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/b39f2335d792edf5c6aa98677e720d39.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 2.6,
      "num_frames": 65,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "b39f2335d792edf5c6aa98677e720d39",
      "caption": "The video shows a single person, a light-skinned individual with short, bald hair, lying on a white couch in a relaxed state, wearing a light blue hospital gown. The person is situated in a calm, indoor environment with a bright background, likely a home, illuminated by natural light through a window with partially closed blinds.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/b39f2335d792edf5c6aa98677e720d39.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_ff925142ff26b6dc2cb6dc048a04ee86": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/ff925142ff26b6dc2cb6dc048a04ee86.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 2.0,
      "num_frames": 50,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "ff925142ff26b6dc2cb6dc048a04ee86",
      "caption": "A young adult male, likely in his twenties or thirties with light skin and dark hair, is seen standing in a casual conversation in a blurred outdoor setting, possibly a park, wearing a light-colored t-shirt with text on it. The relaxed atmosphere and natural light surrounding him suggest a peaceful and informal environment.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/ff925142ff26b6dc2cb6dc048a04ee86.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_22abbb237ffb222a15eb95409ae36785": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/22abbb237ffb222a15eb95409ae36785.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 2.84,
      "num_frames": 71,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "22abbb237ffb222a15eb95409ae36785",
      "caption": "A young woman with blonde hair and a concerned expression sits indoors, wearing a brown jacket with a white collar and a stain on her shoulder, engaged in a serious conversation or encounter. She is seated in a room with a window with blinds and a potted plant nearby, her posture and facial expression conveying a sense of intensity.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/22abbb237ffb222a15eb95409ae36785.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_39be7290fb6bc9b73e1f4eae3674c53f": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/39be7290fb6bc9b73e1f4eae3674c53f.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 2.08,
      "num_frames": 52,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "39be7290fb6bc9b73e1f4eae3674c53f",
      "caption": "A single person, a middle-aged woman with blonde hair, stands upright in a dimly lit room with reddish artificial lighting, wearing a brown leather jacket over a scarf, exuding a contemplative atmosphere as she stands with a slight tilt of her head.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/39be7290fb6bc9b73e1f4eae3674c53f.npy",
      "source_dataset": "hallo3"
    },
    "hallo3_c5b1aa90db4ce0102d302aa32a93c1fa": {
      "video_path": "hallo3/hallo3_training_data/videos_cropped_new/c5b1aa90db4ce0102d302aa32a93c1fa.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 2.2,
      "num_frames": 55,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "c5b1aa90db4ce0102d302aa32a93c1fa",
      "caption": "The video shows a single person, a woman in her thirties with long, wavy brown hair and fair skin, sitting in a dimly lit indoor setting with a window and blinds in the background, wearing a dark-colored top and appearing contemplative with a slightly turned posture and tilted head.",
      "sr": 16000,
      "keypoint_path": "hallo3/hallo3_training_data/keypoint308/c5b1aa90db4ce0102d302aa32a93c1fa.npy",
      "source_dataset": "hallo3"
    },
    "HDTF_WDA_BenCardin0_000_5500_5750": {
      "video_path": "HDTF/video_resampled_segment/WDA_BenCardin0_000_5500_5750.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 764,
      "width": 764,
      "language": "English",
      "emotion": null,
      "index": "WDA_BenCardin0_000_5500_5750",
      "caption": "an older man in a suit and tie, speaking directly to the camera against a backdrop that includes a wooden panel and part of a flag. He appears to be engaged in delivering a formal or professional message.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_BenCardin0_000_5500_5750.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WDA_BenCardin1_000_3750_4000": {
      "video_path": "HDTF/video_resampled_segment/WDA_BenCardin1_000_3750_4000.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 738,
      "width": 738,
      "language": "English",
      "emotion": null,
      "index": "WDA_BenCardin1_000_3750_4000",
      "caption": "an individual dressed in a suit and tie, speaking directly to the camera with a backdrop that includes the U.S. Capitol building, suggesting a political or governmental context. The person appears to be addressing an audience, likely discussing important topics related to governance or policy.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_BenCardin1_000_3750_4000.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WDA_JoeManchin_000_2750_3000": {
      "video_path": "HDTF/video_resampled_segment/WDA_JoeManchin_000_2750_3000.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 830,
      "width": 830,
      "language": "English",
      "emotion": null,
      "index": "WDA_JoeManchin_000_2750_3000",
      "caption": "a man in a suit and tie, speaking directly to the camera against a backdrop that includes a blue sky and what appears to be part of a building. He seems to be delivering a formal or professional message.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_JoeManchin_000_2750_3000.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WRA_JebHensarling0_000_1000_1250": {
      "video_path": "HDTF/video_resampled_segment/WRA_JebHensarling0_000_1000_1250.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 668,
      "width": 668,
      "language": "English",
      "emotion": null,
      "index": "WRA_JebHensarling0_000_1000_1250",
      "caption": "a man in a suit and tie speaking directly to the camera, likely in an indoor setting with statues and architectural details in the background, suggesting a formal or governmental environment.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WRA_JebHensarling0_000_1000_1250.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WDA_FrankPallone0_000_2500_2750": {
      "video_path": "HDTF/video_resampled_segment/WDA_FrankPallone0_000_2500_2750.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 770,
      "width": 770,
      "language": "English",
      "emotion": null,
      "index": "WDA_FrankPallone0_000_2500_2750",
      "caption": "a man speaking in an indoor setting, likely an office or study, with bookshelves filled with books in the background. He appears to be engaged in a serious discussion or interview, as suggested by his focused expression and formal attire.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_FrankPallone0_000_2500_2750.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_RD_Radio8_000_250_500": {
      "video_path": "HDTF/video_resampled_segment/RD_Radio8_000_250_500.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 446,
      "width": 446,
      "language": "English",
      "emotion": null,
      "index": "RD_Radio8_000_250_500",
      "caption": "a woman with long blonde hair speaking directly to the camera, set against an ornate, grand interior background with intricate architectural details and chandeliers. She appears to be delivering a speech or presentation in a formal setting.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/RD_Radio8_000_250_500.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_RD_Radio37_000_500_750": {
      "video_path": "HDTF/video_resampled_segment/RD_Radio37_000_500_750.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 554,
      "width": 554,
      "language": "English",
      "emotion": null,
      "index": "RD_Radio37_000_500_750",
      "caption": "a woman with curly gray hair, wearing a dark blazer and a necklace, speaking directly to the camera in an indoor setting adorned with red curtains and decorative elements.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/RD_Radio37_000_500_750.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WDA_DebbieStabenow1_000_3750_4000": {
      "video_path": "HDTF/video_resampled_segment/WDA_DebbieStabenow1_000_3750_4000.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 566,
      "width": 566,
      "language": "English",
      "emotion": null,
      "index": "WDA_DebbieStabenow1_000_3750_4000",
      "caption": "a woman with blonde hair, wearing a blue blazer and a beaded necklace, speaking directly to the camera against a backdrop of a window with a blurred view of buildings. She appears to be engaged in an interview or discussion, as she is looking slightly off-camera while speaking.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_DebbieStabenow1_000_3750_4000.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_RD_Radio17_000_1500_1750": {
      "video_path": "HDTF/video_resampled_segment/RD_Radio17_000_1500_1750.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 562,
      "width": 562,
      "language": "English",
      "emotion": null,
      "index": "RD_Radio17_000_1500_1750",
      "caption": "a woman speaking outdoors, with a white building and greenery in the background. She appears to be addressing an audience or camera directly, possibly delivering a speech or interview.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/RD_Radio17_000_1500_1750.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WDA_BernieSanders_000_7000_7250": {
      "video_path": "HDTF/video_resampled_segment/WDA_BernieSanders_000_7000_7250.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 974,
      "width": 974,
      "language": "English",
      "emotion": null,
      "index": "WDA_BernieSanders_000_7000_7250",
      "caption": "a man in a suit and tie speaking directly to the camera, with an American flag visible in the background. The setting appears formal, suggesting a political or professional context.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_BernieSanders_000_7000_7250.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WDA_JackyRosen1_000_7250_7500": {
      "video_path": "HDTF/video_resampled_segment/WDA_JackyRosen1_000_7250_7500.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 674,
      "width": 674,
      "language": "English",
      "emotion": null,
      "index": "WDA_JackyRosen1_000_7250_7500",
      "caption": "a woman speaking directly to the camera, likely in an interview or news segment setting, as she addresses the audience with a serious and composed demeanor.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_JackyRosen1_000_7250_7500.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WDA_ChrisVanHollen0_000_4250_4500": {
      "video_path": "HDTF/video_resampled_segment/WDA_ChrisVanHollen0_000_4250_4500.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 718,
      "width": 718,
      "language": "English",
      "emotion": null,
      "index": "WDA_ChrisVanHollen0_000_4250_4500",
      "caption": "a man in a suit and tie, seated against a wooden backdrop, speaking directly to the camera with a serious expression. He appears to be engaged in an interview or discussion, maintaining eye contact and using hand gestures to emphasize his points.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_ChrisVanHollen0_000_4250_4500.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WRA_VickyHartzler_000_2750_3000": {
      "video_path": "HDTF/video_resampled_segment/WRA_VickyHartzler_000_2750_3000.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 676,
      "width": 676,
      "language": "English",
      "emotion": null,
      "index": "WRA_VickyHartzler_000_2750_3000",
      "caption": "a woman with short brown hair, wearing a blue top, speaking directly to the camera against a backdrop of a stone wall. She appears to be engaged in a conversation or delivering a message, as her facial expressions and mouth movements suggest she is actively speaking.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WRA_VickyHartzler_000_2750_3000.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WDA_AmyKlobuchar1_000_2250_2500": {
      "video_path": "HDTF/video_resampled_segment/WDA_AmyKlobuchar1_000_2250_2500.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 1004,
      "width": 1004,
      "language": "English",
      "emotion": null,
      "index": "WDA_AmyKlobuchar1_000_2250_2500",
      "caption": "a woman speaking directly to the camera, likely delivering an interview or presentation in a professional setting with a bookshelf and screen in the background.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_AmyKlobuchar1_000_2250_2500.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WDA_ChrisMurphy1_000_3000_3250": {
      "video_path": "HDTF/video_resampled_segment/WDA_ChrisMurphy1_000_3000_3250.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 868,
      "width": 868,
      "language": "English",
      "emotion": null,
      "index": "WDA_ChrisMurphy1_000_3000_3250",
      "caption": "a man in a suit and tie speaking directly to the camera, likely delivering a news report or an interview segment. The background consists of horizontal wooden slats, suggesting a studio setting.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_ChrisMurphy1_000_3000_3250.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WDA_MarkWarner1_001_750_1000": {
      "video_path": "HDTF/video_resampled_segment/WDA_MarkWarner1_001_750_1000.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 742,
      "width": 742,
      "language": "English",
      "emotion": null,
      "index": "WDA_MarkWarner1_001_750_1000",
      "caption": "a man in a suit and tie speaking directly to the camera, likely in an interview or news segment setting, with a wooden panel background.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_MarkWarner1_001_750_1000.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WDA_JohnLewis1_000_6000_6250": {
      "video_path": "HDTF/video_resampled_segment/WDA_JohnLewis1_000_6000_6250.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 520,
      "width": 520,
      "language": "English",
      "emotion": null,
      "index": "WDA_JohnLewis1_000_6000_6250",
      "caption": "a man in a suit and tie speaking directly to the camera, likely delivering an interview or making a statement. The background includes framed pictures, suggesting a professional or formal setting.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_JohnLewis1_000_6000_6250.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WRA_JohnBoehner0_000_1500_1626": {
      "video_path": "HDTF/video_resampled_segment/WRA_JohnBoehner0_000_1500_1626.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 5.04,
      "num_frames": 126,
      "height": 772,
      "width": 772,
      "language": "English",
      "emotion": null,
      "index": "WRA_JohnBoehner0_000_1500_1626",
      "caption": "a man in a suit and tie speaking to the camera, with an American flag visible in the background. The man appears to be addressing an audience or giving a speech, as he is looking directly at the camera and speaking with a serious expression.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WRA_JohnBoehner0_000_1500_1626.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WDA_DougJones_000_3000_3250": {
      "video_path": "HDTF/video_resampled_segment/WDA_DougJones_000_3000_3250.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 702,
      "width": 702,
      "language": "English",
      "emotion": null,
      "index": "WDA_DougJones_000_3000_3250",
      "caption": "a man in a suit and tie, seated indoors with a blurred background that includes a building resembling a government or institutional structure. He appears to be speaking, likely participating in an interview or formal discussion.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_DougJones_000_3000_3250.npy",
      "source_dataset": "HDTF"
    },
    "HDTF_WDA_LaurenUnderwood_000_6500_6750": {
      "video_path": "HDTF/video_resampled_segment/WDA_LaurenUnderwood_000_6500_6750.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 10.0,
      "num_frames": 250,
      "height": 942,
      "width": 942,
      "language": "English",
      "emotion": null,
      "index": "WDA_LaurenUnderwood_000_6500_6750",
      "caption": "a person speaking directly to the camera, wearing glasses and a white top, against a plain background. The individual appears to be engaged in a conversation or presentation, as indicated by their expressive hand gestures and facial expressions.",
      "sr": 16000,
      "keypoint_path": "HDTF/keypoint308/WDA_LaurenUnderwood_000_6500_6750.npy",
      "source_dataset": "HDTF"
    },
    "vfhq_Clip+zz1qAQ6KjWU+P0+C2+F23220-23598": {
      "video_path": "vfhq/video_resampled/Clip+zz1qAQ6KjWU+P0+C2+F23220-23598.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 15.08,
      "num_frames": 377,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zz1qAQ6KjWU+P0+C2+F23220-23598",
      "caption": "a man with curly hair and a beard, wearing a blue shirt, sitting on a leopard-print couch. He appears to be engaged in a conversation or interview, as he is looking slightly to his left with a thoughtful expression.",
      "sr": null,
      "keypoint_path": "vfhq/keypoint308/Clip+zz1qAQ6KjWU+P0+C2+F23220-23598.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zcIsBtKySWY+P0+C0+F4030-4345": {
      "video_path": "vfhq/video_resampled/Clip+zcIsBtKySWY+P0+C0+F4030-4345.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 12.56,
      "num_frames": 314,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zcIsBtKySWY+P0+C0+F4030-4345",
      "caption": "a person with pink hair and bold makeup, wearing a black top and a gold necklace, against a teal background. The individual is speaking and appears to be engaged in a conversation or presentation.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+zcIsBtKySWY+P0+C0+F4030-4345.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zhCoO7i0aOc+P0+C2+F1274-1404": {
      "video_path": "vfhq/video_resampled/Clip+zhCoO7i0aOc+P0+C2+F1274-1404.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 5.36,
      "num_frames": 134,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zhCoO7i0aOc+P0+C2+F1274-1404",
      "caption": "a woman with curly blonde hair speaking into a microphone, likely at an event or interview setting, as indicated by the backdrop with logos and text.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+zhCoO7i0aOc+P0+C2+F1274-1404.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zy-RE76XDgM+P0+C1+F92975-93111": {
      "video_path": "vfhq/video_resampled/Clip+zy-RE76XDgM+P0+C1+F92975-93111.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 4.56,
      "num_frames": 114,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zy-RE76XDgM+P0+C1+F92975-93111",
      "caption": "a man in a suit, seated and engaged in conversation, with a dark background that keeps the focus on him.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+zy-RE76XDgM+P0+C1+F92975-93111.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zkXz_3qv9us+P0+C0+F12194-12558": {
      "video_path": "vfhq/video_resampled/Clip+zkXz_3qv9us+P0+C0+F12194-12558.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 15.12,
      "num_frames": 378,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zkXz_3qv9us+P0+C0+F12194-12558",
      "caption": "a woman with short blonde hair, wearing a black top, speaking directly to the camera against a plain white background. She appears to be discussing a topic of interest, engaging the viewer with her expressive facial expressions and direct gaze.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+zkXz_3qv9us+P0+C0+F12194-12558.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zdmXiN7102g+P0+C2+F3802-3937": {
      "video_path": "vfhq/video_resampled/Clip+zdmXiN7102g+P0+C2+F3802-3937.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 5.36,
      "num_frames": 134,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zdmXiN7102g+P0+C2+F3802-3937",
      "caption": "a person speaking directly to the camera, set against a backdrop of neatly arranged books on shelves, suggesting an academic or intellectual setting. The individual appears to be engaged in delivering a message or explanation, possibly related to a topic of study or personal interest.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+zdmXiN7102g+P0+C2+F3802-3937.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zp0GOlRwu_8+P0+C0+F3523-3680": {
      "video_path": "vfhq/video_resampled/Clip+zp0GOlRwu_8+P0+C0+F3523-3680.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 5.28,
      "num_frames": 132,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zp0GOlRwu_8+P0+C0+F3523-3680",
      "caption": "a close-up shot of a man in a suit, who appears to be speaking or reacting to something off-camera with a slight smile and engaged expression. The background is dimly lit, suggesting an indoor setting, possibly a studio or stage environment.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+zp0GOlRwu_8+P0+C0+F3523-3680.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zzdr0uOCaOQ+P0+C1+F1743-2098": {
      "video_path": "vfhq/video_resampled/Clip+zzdr0uOCaOQ+P0+C1+F1743-2098.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 11.88,
      "num_frames": 297,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zzdr0uOCaOQ+P0+C1+F1743-2098",
      "caption": "a woman with long, wavy hair styled in a high ponytail, wearing large hoop earrings and a sleeveless top. She is speaking directly to the camera against a solid purple background.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+zzdr0uOCaOQ+P0+C1+F1743-2098.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zhCoO7i0aOc+P0+C1+F672-894": {
      "video_path": "vfhq/video_resampled/Clip+zhCoO7i0aOc+P0+C1+F672-894.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 9.2,
      "num_frames": 230,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zhCoO7i0aOc+P0+C1+F672-894",
      "caption": "a woman with curly blonde hair speaking into a microphone, likely at an event or press conference, as indicated by the backdrop with logos such as PayPal and others.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+zhCoO7i0aOc+P0+C1+F672-894.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zr-J6NgDqiQ+P1+C0+F134348-134512": {
      "video_path": "vfhq/video_resampled/Clip+zr-J6NgDqiQ+P1+C0+F134348-134512.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 5.52,
      "num_frames": 138,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zr-J6NgDqiQ+P1+C0+F134348-134512",
      "caption": "a close-up shot of a person singing into a microphone, with a purple background that adds a dramatic effect to the performance. The individual appears to be deeply engaged in their vocal delivery, suggesting a passionate or emotional performance.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+zr-J6NgDqiQ+P1+C0+F134348-134512.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zZbzlKhaRok+P0+C2+F2249-2425": {
      "video_path": "vfhq/video_resampled/Clip+zZbzlKhaRok+P0+C2+F2249-2425.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 7.0,
      "num_frames": 175,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zZbzlKhaRok+P0+C2+F2249-2425",
      "caption": "a man in a suit and tie, speaking with a serious expression against a backdrop of golden, abstract patterns. The setting suggests a formal or professional environment, possibly an interview or a speech.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+zZbzlKhaRok+P0+C2+F2249-2425.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+ze1TZ1BJeCE+P0+C1+F19807-20208": {
      "video_path": "vfhq/video_resampled/Clip+ze1TZ1BJeCE+P0+C1+F19807-20208.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 6.72,
      "num_frames": 168,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+ze1TZ1BJeCE+P0+C1+F19807-20208",
      "caption": "a woman speaking directly to the camera, likely sharing information or engaging with an audience, as she gestures with her hand while talking. The background appears to be a well-lit room with a mirror and some decor visible.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+ze1TZ1BJeCE+P0+C1+F19807-20208.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+za5IQKbdwbs+P0+C0+F28001-28255": {
      "video_path": "vfhq/video_resampled/Clip+za5IQKbdwbs+P0+C0+F28001-28255.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 8.52,
      "num_frames": 213,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+za5IQKbdwbs+P0+C0+F28001-28255",
      "caption": "a man speaking directly to the camera, wearing a black jacket over a white shirt, with a neutral background that keeps the focus on him.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+za5IQKbdwbs+P0+C0+F28001-28255.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+ze1TZ1BJeCE+P0+C2+F23385-23527": {
      "video_path": "vfhq/video_resampled/Clip+ze1TZ1BJeCE+P0+C2+F23385-23527.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 2.4,
      "num_frames": 60,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+ze1TZ1BJeCE+P0+C2+F23385-23527",
      "caption": "a person applying makeup, specifically using a sponge to blend or apply product on their face. The setting appears to be a well-lit bathroom with a mirror and vanity lights visible in the background.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+ze1TZ1BJeCE+P0+C2+F23385-23527.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zy-RE76XDgM+P0+C2+F3807-3939": {
      "video_path": "vfhq/video_resampled/Clip+zy-RE76XDgM+P0+C2+F3807-3939.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 4.44,
      "num_frames": 111,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zy-RE76XDgM+P0+C2+F3807-3939",
      "caption": "a man in a suit, seated and speaking, likely engaged in an interview or discussion. The setting appears to be a professional environment with a dark background, emphasizing the speaker's attire and demeanor.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+zy-RE76XDgM+P0+C2+F3807-3939.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zxw_0oMPChQ+P1+C1+F8213-8440": {
      "video_path": "vfhq/video_resampled/Clip+zxw_0oMPChQ+P1+C1+F8213-8440.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 7.6,
      "num_frames": 190,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zxw_0oMPChQ+P1+C1+F8213-8440",
      "caption": "a man with gray hair and a beard, wearing a gray blazer over a dark shirt, speaking in what appears to be an interview or discussion setting. The background is blurred, focusing attention on the speaker.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+zxw_0oMPChQ+P1+C1+F8213-8440.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+ze1TZ1BJeCE+P0+C0+F20213-20494": {
      "video_path": "vfhq/video_resampled/Clip+ze1TZ1BJeCE+P0+C0+F20213-20494.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 4.72,
      "num_frames": 118,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+ze1TZ1BJeCE+P0+C0+F20213-20494",
      "caption": "a person demonstrating a skincare routine, applying a product to their face with their fingers. The setting appears to be a well-lit bathroom with a mirror and vanity in the background.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+ze1TZ1BJeCE+P0+C0+F20213-20494.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+z_VWgLm0oHw+P0+C0+F2226-2335": {
      "video_path": "vfhq/video_resampled/Clip+z_VWgLm0oHw+P0+C0+F2226-2335.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 4.32,
      "num_frames": 108,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+z_VWgLm0oHw+P0+C0+F2226-2335",
      "caption": "a person with shoulder-length brown hair, wearing a necklace, holding their hand near their face, possibly in a thoughtful or contemplative pose. The lighting is dim, creating a moody atmosphere.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+z_VWgLm0oHw+P0+C0+F2226-2335.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zqSJy6IyFXk+P0+C1+F4924-5039": {
      "video_path": "vfhq/video_resampled/Clip+zqSJy6IyFXk+P0+C1+F4924-5039.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 4.76,
      "num_frames": 119,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zqSJy6IyFXk+P0+C1+F4924-5039",
      "caption": "a man with short, dark hair and a neatly groomed appearance, wearing a dark-colored shirt. He is speaking directly to the camera against a plain, dark background, suggesting an interview or discussion setting.",
      "sr": null,
      "keypoint_path": "vfhq/keypoint308/Clip+zqSJy6IyFXk+P0+C1+F4924-5039.npy",
      "source_dataset": "vfhq"
    },
    "vfhq_Clip+zhCoO7i0aOc+P0+C0+F926-1263": {
      "video_path": "vfhq/video_resampled/Clip+zhCoO7i0aOc+P0+C0+F926-1263.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 14.0,
      "num_frames": 350,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": null,
      "index": "Clip+zhCoO7i0aOc+P0+C0+F926-1263",
      "caption": "a woman with blonde hair and large hoop earrings speaking into a microphone at an event, with a blue background that includes logos for PayPal and other sponsors.",
      "sr": 16000,
      "keypoint_path": "vfhq/keypoint308/Clip+zhCoO7i0aOc+P0+C0+F926-1263.npy",
      "source_dataset": "vfhq"
    },
    "celebv-hq_zogIrkxnsfw_30": {
      "video_path": "celebv-hq/videos_resampled/resampled_zogIrkxnsfw_30.mp4",
      "audio_path": "celebv-hq/audio/resampled_zogIrkxnsfw_30.wav",
      "fps": 25.0,
      "duration": 3.32,
      "num_frames": 83,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "neutral"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        1,
        0,
        0
      ],
      "index": "zogIrkxnsfw_30",
      "caption": "an individual discussing their vision for societal change, with a focus on how they would like to see society evolve in the future. The setting appears to be an outdoor urban area, possibly near a store or public space.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zogIrkxnsfw_30.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zogIrkxnsfw_0": {
      "video_path": "celebv-hq/videos_resampled/resampled_zogIrkxnsfw_0.mp4",
      "audio_path": "celebv-hq/audio/resampled_zogIrkxnsfw_0.wav",
      "fps": 25.0,
      "duration": 3.76,
      "num_frames": 94,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "neutral"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        1,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        1,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        1,
        1,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "index": "zogIrkxnsfw_0",
      "caption": "a man with dreadlocks speaking directly to the camera, set against a backdrop of playground equipment and a clear sky, suggesting an outdoor setting. The text \"EASY ENG\" is visible in the top right corner, indicating the video might be part of a series or channel focused on engaging content.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zogIrkxnsfw_0.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zogIrkxnsfw_23": {
      "video_path": "celebv-hq/videos_resampled/resampled_zogIrkxnsfw_23.mp4",
      "audio_path": "celebv-hq/audio/resampled_zogIrkxnsfw_23.wav",
      "fps": 25.0,
      "duration": 19.36,
      "num_frames": 484,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "neutral"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        1,
        0,
        1,
        0,
        0
      ],
      "appearance": [
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "index": "zogIrkxnsfw_23",
      "caption": "a young man standing outdoors, speaking directly to the camera with a backdrop of greenery and trees. He appears to be giving an interview or sharing information, as he maintains eye contact with the viewer throughout his speech.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zogIrkxnsfw_23.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zqedeJzi4gc_3": {
      "video_path": "celebv-hq/videos_resampled/resampled_zqedeJzi4gc_3.mp4",
      "audio_path": "celebv-hq/audio/resampled_zqedeJzi4gc_3.wav",
      "fps": 25.0,
      "duration": 3.72,
      "num_frames": 93,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "sadness"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        1,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0
      ],
      "index": "zqedeJzi4gc_3",
      "caption": "a close-up shot of a person with blonde hair, set against a softly lit background that suggests an indoor setting. The lighting creates a warm and intimate atmosphere, focusing on the individual's hair and part of their face.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zqedeJzi4gc_3.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zoLk-qxLnZc_8_0": {
      "video_path": "celebv-hq/videos_resampled/resampled_zoLk-qxLnZc_8_0.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 3.4,
      "num_frames": 85,
      "height": 838,
      "width": 838,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "neutral"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        1,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        1,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "index": "zoLk-qxLnZc_8_0",
      "caption": "a person wearing a blue shirt, with their hair falling over one shoulder, set against an outdoor background with greenery visible. The individual appears to be engaged in a conversation or reacting to something, as suggested by their facial expression and body language.",
      "sr": null,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zoLk-qxLnZc_8_0.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zpU2uUohAyo_0": {
      "video_path": "celebv-hq/videos_resampled/resampled_zpU2uUohAyo_0.mp4",
      "audio_path": "celebv-hq/audio/resampled_zpU2uUohAyo_0.wav",
      "fps": 25.0,
      "duration": 6.16,
      "num_frames": 154,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "neutral"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        1,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "index": "zpU2uUohAyo_0",
      "caption": "a man speaking directly to the camera against a black background, likely delivering an informative or explanatory message.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zpU2uUohAyo_0.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zogIrkxnsfw_27": {
      "video_path": "celebv-hq/videos_resampled/resampled_zogIrkxnsfw_27.mp4",
      "audio_path": "celebv-hq/audio/resampled_zogIrkxnsfw_27.wav",
      "fps": 25.0,
      "duration": 20.04,
      "num_frames": 501,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "happy"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        1,
        0,
        0,
        1,
        0,
        0,
        1,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        1,
        1,
        0,
        0
      ],
      "index": "zogIrkxnsfw_27",
      "caption": "a woman with braided hair and glasses speaking into a microphone, gesturing as she talks, likely giving an interview or making a statement in front of a blue building.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zogIrkxnsfw_27.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zoSzqHlvN6s_0": {
      "video_path": "celebv-hq/videos_resampled/resampled_zoSzqHlvN6s_0.mp4",
      "audio_path": "celebv-hq/audio/resampled_zoSzqHlvN6s_0.wav",
      "fps": 25.0,
      "duration": 3.64,
      "num_frames": 91,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "neutral"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        1,
        0,
        0,
        0
      ],
      "index": "zoSzqHlvN6s_0",
      "caption": "a woman with a surprised or shocked expression, her mouth open and eyes wide, as she gestures with her hand, possibly reacting to something unexpected. The background is blurred but appears to be an indoor setting with warm lighting.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zoSzqHlvN6s_0.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zvGtGNARfqA_18_0": {
      "video_path": "celebv-hq/videos_resampled/resampled_zvGtGNARfqA_18_0.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 4.76,
      "num_frames": 119,
      "height": 1166,
      "width": 1166,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "happy"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        1,
        0,
        1,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "index": "zvGtGNARfqA_18_0",
      "caption": "a person with long brown hair, wearing a red top, standing outdoors with a blurred background that suggests a garden or park setting. The individual appears to be speaking or presenting, as indicated by their open mouth and engaged expression.",
      "sr": null,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zvGtGNARfqA_18_0.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zqdC-PJt-nE_0": {
      "video_path": "celebv-hq/videos_resampled/resampled_zqdC-PJt-nE_0.mp4",
      "audio_path": "celebv-hq/audio/resampled_zqdC-PJt-nE_0.wav",
      "fps": 25.0,
      "duration": 19.96,
      "num_frames": 499,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "anger"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        1,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "index": "zqdC-PJt-nE_0",
      "caption": "a person speaking directly to the camera against a black background, with their upper body and face visible. The lighting highlights their facial expressions as they communicate.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zqdC-PJt-nE_0.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_znZDbxVmFbM_0": {
      "video_path": "celebv-hq/videos_resampled/resampled_znZDbxVmFbM_0.mp4",
      "audio_path": "celebv-hq/audio/resampled_znZDbxVmFbM_0.wav",
      "fps": 25.0,
      "duration": 4.96,
      "num_frames": 124,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "neutral"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        1,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "index": "znZDbxVmFbM_0",
      "caption": "a person walking outdoors, with snowflakes visible in the air, suggesting it is snowing. The background appears to be an urban setting with buildings partially obscured by the falling snow.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_znZDbxVmFbM_0.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zwrE99cctuw_24": {
      "video_path": "celebv-hq/videos_resampled/resampled_zwrE99cctuw_24.mp4",
      "audio_path": "celebv-hq/audio/resampled_zwrE99cctuw_24.wav",
      "fps": 25.0,
      "duration": 4.4,
      "num_frames": 110,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "happy"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0
      ],
      "index": "zwrE99cctuw_24",
      "caption": "a woman with short hair smiling and speaking, likely engaging in an interview or conversation in a public setting, as other people can be seen walking in the background.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zwrE99cctuw_24.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zqTxw0eiZaE_1": {
      "video_path": "celebv-hq/videos_resampled/resampled_zqTxw0eiZaE_1.mp4",
      "audio_path": "celebv-hq/audio/resampled_zqTxw0eiZaE_1.wav",
      "fps": 25.0,
      "duration": 8.24,
      "num_frames": 206,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": true,
        "labels": [
          {
            "emotion": "happy",
            "start_sec": 0.9699999999999989,
            "end_sec": 2.969999999999999
          },
          {
            "emotion": "neutral",
            "start_sec": 3.969999999999999,
            "end_sec": 7.969999999999999
          }
        ]
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0
      ],
      "index": "zqTxw0eiZaE_1",
      "caption": "an elderly man with white hair, dressed in a suit and tie, smiling warmly while looking to his left. The background shows lush greenery, suggesting an outdoor setting.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zqTxw0eiZaE_1.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zwrE99cctuw_9": {
      "video_path": "celebv-hq/videos_resampled/resampled_zwrE99cctuw_9.mp4",
      "audio_path": "celebv-hq/audio/resampled_zwrE99cctuw_9.wav",
      "fps": 25.0,
      "duration": 2.92,
      "num_frames": 73,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "neutral"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        1,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "index": "zwrE99cctuw_9",
      "caption": "a person speaking directly to the camera, likely providing commentary or information, with a background that includes storefronts and signage, suggesting an urban setting.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zwrE99cctuw_9.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zzQ8ZvPr-xk_44_0": {
      "video_path": "celebv-hq/videos_resampled/resampled_zzQ8ZvPr-xk_44_0.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 3.24,
      "num_frames": 81,
      "height": 1408,
      "width": 1408,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "neutral"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "index": "zzQ8ZvPr-xk_44_0",
      "caption": "a person speaking directly to the camera, gesturing with their hands as they explain something, and then transitions to a close-up shot where the individual continues to speak while making expressive hand movements.",
      "sr": null,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zzQ8ZvPr-xk_44_0.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zogIrkxnsfw_1": {
      "video_path": "celebv-hq/videos_resampled/resampled_zogIrkxnsfw_1.mp4",
      "audio_path": "celebv-hq/audio/resampled_zogIrkxnsfw_1.wav",
      "fps": 25.0,
      "duration": 5.88,
      "num_frames": 147,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "neutral"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        1,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        1,
        1,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "index": "zogIrkxnsfw_1",
      "caption": "an individual with dreadlocks speaking into a microphone, likely being interviewed or giving a statement, set against a backdrop that includes playground equipment and a clear sky.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zogIrkxnsfw_1.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zzUYkxN11P4_0": {
      "video_path": "celebv-hq/videos_resampled/resampled_zzUYkxN11P4_0.mp4",
      "audio_path": "celebv-hq/audio/resampled_zzUYkxN11P4_0.wav",
      "fps": 25.0,
      "duration": 3.88,
      "num_frames": 97,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "neutral"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        1,
        0,
        0,
        1,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0
      ],
      "index": "zzUYkxN11P4_0",
      "caption": "a close-up shot of a woman with long, wavy hair, set against an outdoor background that suggests a natural or rural setting. Her expression appears contemplative or concerned as she looks off to the side.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zzUYkxN11P4_0.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zzORtbUYE4c_2": {
      "video_path": "celebv-hq/videos_resampled/resampled_zzORtbUYE4c_2.mp4",
      "audio_path": "celebv-hq/audio/resampled_zzORtbUYE4c_2.wav",
      "fps": 25.0,
      "duration": 3.36,
      "num_frames": 84,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "happy"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        1,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "index": "zzORtbUYE4c_2",
      "caption": "a close-up shot of a person with long, wavy hair, smiling warmly and looking directly at the camera. The background appears to be an indoor setting with wooden paneling.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zzORtbUYE4c_2.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zqedeJzi4gc_7": {
      "video_path": "celebv-hq/videos_resampled/resampled_zqedeJzi4gc_7.mp4",
      "audio_path": "celebv-hq/audio/resampled_zqedeJzi4gc_7.wav",
      "fps": 25.0,
      "duration": 3.88,
      "num_frames": 97,
      "height": 512,
      "width": 512,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "sadness"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        1,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "index": "zqedeJzi4gc_7",
      "caption": "a close-up shot of a person with long dark hair, set against a dark background illuminated by a red light, creating a dramatic and intense atmosphere.",
      "sr": 16000,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zqedeJzi4gc_7.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-hq_zsImYVwuxrs_19_1": {
      "video_path": "celebv-hq/videos_resampled/resampled_zsImYVwuxrs_19_1.mp4",
      "audio_path": null,
      "fps": 25.0,
      "duration": 4.2,
      "num_frames": 105,
      "height": 1220,
      "width": 1220,
      "language": "English",
      "emotion": {
        "sep_flag": false,
        "labels": "neutral"
      },
      "action": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0
      ],
      "appearance": [
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "index": "zsImYVwuxrs_19_1",
      "caption": "a shirtless man with short hair, resting his head on one hand and looking directly at the camera with a thoughtful expression.",
      "sr": null,
      "keypoint_path": "celebv-hq/keypoint308/resampled_zsImYVwuxrs_19_1.npy",
      "source_dataset": "celebv-hq"
    },
    "celebv-text_7QF_-HVWtNY_1_0": {
      "video_path": "celebv-text/video_resampled/7QF_-HVWtNY_1_0.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/7QF_-HVWtNY_1_0.m4a",
      "fps": 24.0,
      "duration": 17.083333333333332,
      "num_frames": 410,
      "height": 1164,
      "width": 1270,
      "language": "English",
      "emotion": null,
      "index": "7QF_-HVWtNY_1_0",
      "caption": "a man with glasses and a beard speaking directly to the camera, likely discussing a topic of interest or sharing information. The background includes a bookshelf filled with books and a guitar hanging on the wall, suggesting a personal or home office setting.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/7QF_-HVWtNY_1_0.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_DeGChiRDY9A_25_3": {
      "video_path": "celebv-text/video_resampled/DeGChiRDY9A_25_3.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/DeGChiRDY9A_25_3.m4a",
      "fps": 29.97,
      "duration": 13.680347013680347,
      "num_frames": 410,
      "height": 1370,
      "width": 1496,
      "language": "English",
      "emotion": null,
      "index": "DeGChiRDY9A_25_3",
      "caption": "a person speaking directly to the camera, likely sharing personal stories or experiences, with a cozy kitchen setting in the background. The individual appears to be engaging with the audience, possibly discussing topics related to lifestyle, personal growth, or daily life.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/DeGChiRDY9A_25_3.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_POZ5ZHEQK7s_23_4": {
      "video_path": "celebv-text/video_resampled/POZ5ZHEQK7s_23_4.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/POZ5ZHEQK7s_23_4.m4a",
      "fps": 30.0,
      "duration": 10.0,
      "num_frames": 300,
      "height": 1134,
      "width": 1144,
      "language": "English",
      "emotion": null,
      "index": "POZ5ZHEQK7s_23_4",
      "caption": "a person wearing a white cap and a red tank top, sitting in front of a shelf with decorative items such as a pineapple and a sign that reads \"LUKE.\" The individual appears to be speaking or reacting to something, possibly engaging in a casual conversation or vlog-style recording.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/POZ5ZHEQK7s_23_4.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_k13lh4ibfks_8_1": {
      "video_path": "celebv-text/video_resampled/k13lh4ibfks_8_1.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/k13lh4ibfks_8_1.m4a",
      "fps": 29.97,
      "duration": 5.372038705372039,
      "num_frames": 161,
      "height": 1100,
      "width": 1154,
      "language": "English",
      "emotion": null,
      "index": "k13lh4ibfks_8_1",
      "caption": "a person sitting inside a car, making playful hand gestures and expressions towards the camera, creating a lighthearted and engaging atmosphere.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/k13lh4ibfks_8_1.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_hO3KkfcDPRI_8_0": {
      "video_path": "celebv-text/video_resampled/hO3KkfcDPRI_8_0.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/hO3KkfcDPRI_8_0.m4a",
      "fps": 25.0,
      "duration": 13.0,
      "num_frames": 325,
      "height": 860,
      "width": 866,
      "language": "English",
      "emotion": null,
      "index": "hO3KkfcDPRI_8_0",
      "caption": "a person wearing a red outfit, holding a microphone and singing on stage under bright lighting. The background is dark, emphasizing the performer's presence.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/hO3KkfcDPRI_8_0.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_VGBxiZsnXh4_14_0": {
      "video_path": "celebv-text/video_resampled/VGBxiZsnXh4_14_0.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/VGBxiZsnXh4_14_0.m4a",
      "fps": 23.976,
      "duration": 7.21554888221555,
      "num_frames": 173,
      "height": 1032,
      "width": 1126,
      "language": "English",
      "emotion": null,
      "index": "VGBxiZsnXh4_14_0",
      "caption": "a person singing into a microphone, with their eyes closed and a focused expression, suggesting they are deeply engaged in their performance. The background is plain and light-colored, keeping the focus on the singer's actions.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/VGBxiZsnXh4_14_0.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_MstZPG_FDns_12_0": {
      "video_path": "celebv-text/video_resampled/MstZPG_FDns_12_0.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/MstZPG_FDns_12_0.m4a",
      "fps": 30.0,
      "duration": 12.7,
      "num_frames": 381,
      "height": 524,
      "width": 572,
      "language": "English",
      "emotion": null,
      "index": "MstZPG_FDns_12_0",
      "caption": "a person wearing a black hoodie, speaking directly to the camera against a backdrop of shelves displaying various items, including what appears to be a gaming headset and a plush toy. The setting is illuminated with blue lighting, creating a modern and tech-oriented atmosphere.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/MstZPG_FDns_12_0.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_A4MOnfWx72M_5_0": {
      "video_path": "celebv-text/video_resampled/A4MOnfWx72M_5_0.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/A4MOnfWx72M_5_0.m4a",
      "fps": 29.97,
      "duration": 8.575241908575242,
      "num_frames": 257,
      "height": 1396,
      "width": 1404,
      "language": "English",
      "emotion": null,
      "index": "A4MOnfWx72M_5_0",
      "caption": "a close-up of a person wearing a military helmet adorned with insignia, standing against a backdrop of an American flag, suggesting a patriotic or historical context. The individual appears to be dressed in formal military attire, indicating a ceremonial or commemorative event.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/A4MOnfWx72M_5_0.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_-lPQTvZIogI_228_0": {
      "video_path": "celebv-text/video_resampled/-lPQTvZIogI_228_0.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/-lPQTvZIogI_228_0.m4a",
      "fps": 60.0,
      "duration": 5.266666666666667,
      "num_frames": 316,
      "height": 994,
      "width": 1084,
      "language": "English",
      "emotion": null,
      "index": "-lPQTvZIogI_228_0",
      "caption": "a man with gray hair and glasses, wearing a light blue shirt, speaking directly to the camera in what appears to be an interview or documentary setting. The background is dimly lit, focusing attention on his face as he delivers his message.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/-lPQTvZIogI_228_0.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_mkAERwE3rpE_5_0": {
      "video_path": "celebv-text/video_resampled/mkAERwE3rpE_5_0.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/mkAERwE3rpE_5_0.m4a",
      "fps": 30.0,
      "duration": 6.066666666666666,
      "num_frames": 182,
      "height": 1082,
      "width": 1180,
      "language": "English",
      "emotion": null,
      "index": "mkAERwE3rpE_5_0",
      "caption": "a man with short dark hair and a beard, wearing a black shirt, speaking to the camera while standing outdoors on a street with buildings and other people visible in the background.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/mkAERwE3rpE_5_0.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_pnUbjHr7jbU_25_0": {
      "video_path": "celebv-text/video_resampled/pnUbjHr7jbU_25_0.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/pnUbjHr7jbU_25_0.m4a",
      "fps": 29.97,
      "duration": 6.5732399065732405,
      "num_frames": 197,
      "height": 1088,
      "width": 1090,
      "language": "English",
      "emotion": null,
      "index": "pnUbjHr7jbU_25_0",
      "caption": "a close-up shot of a person with their hair tied back, wearing a denim jacket and small earrings, set against a blurred background that includes some greenery and flowers. The lighting is soft, highlighting the subject's face and creating a calm, introspective atmosphere.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/pnUbjHr7jbU_25_0.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_WgdlBiTsl8E_0_1": {
      "video_path": "celebv-text/video_resampled/WgdlBiTsl8E_0_1.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/WgdlBiTsl8E_0_1.m4a",
      "fps": 29.97,
      "duration": 8.041374708041374,
      "num_frames": 241,
      "height": 1110,
      "width": 1212,
      "language": "English",
      "emotion": null,
      "index": "WgdlBiTsl8E_0_1",
      "caption": "a person with glasses and an earring, speaking directly to the camera against a backdrop of shelves filled with books and decorative items. The individual appears to be engaged in a conversation or presentation, as they gesture slightly while talking.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/WgdlBiTsl8E_0_1.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_YSpF9PfDDmw_7_0": {
      "video_path": "celebv-text/video_resampled/YSpF9PfDDmw_7_0.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/YSpF9PfDDmw_7_0.m4a",
      "fps": 29.97,
      "duration": 5.638972305638973,
      "num_frames": 169,
      "height": 694,
      "width": 758,
      "language": "English",
      "emotion": null,
      "index": "YSpF9PfDDmw_7_0",
      "caption": "a person speaking directly to the camera against a vibrant green background adorned with sun motifs, creating a lively and engaging atmosphere.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/YSpF9PfDDmw_7_0.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_PV-W5iG6yXQ_30_1": {
      "video_path": "celebv-text/video_resampled/PV-W5iG6yXQ_30_1.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/PV-W5iG6yXQ_30_1.m4a",
      "fps": 25.0,
      "duration": 10.08,
      "num_frames": 252,
      "height": 708,
      "width": 772,
      "language": "English",
      "emotion": null,
      "index": "PV-W5iG6yXQ_30_1",
      "caption": "a man speaking directly to the camera, with a background image of a surfer riding a wave. The man appears to be explaining or discussing something related to surfing or water sports.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/PV-W5iG6yXQ_30_1.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_4bxRsuIaFTY_38_0": {
      "video_path": "celebv-text/video_resampled/4bxRsuIaFTY_38_0.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/4bxRsuIaFTY_38_0.m4a",
      "fps": 29.97,
      "duration": 17.083750417083753,
      "num_frames": 512,
      "height": 1138,
      "width": 1220,
      "language": "English",
      "emotion": null,
      "index": "4bxRsuIaFTY_38_0",
      "caption": "a young girl with long dark hair, wearing a yellow top and a decorative hairpin, sitting on a couch against a dark background. She appears to be engaged in a conversation or interview, as she is looking slightly to her left with a thoughtful expression.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/4bxRsuIaFTY_38_0.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_VprUqqG5VKY_0_4": {
      "video_path": "celebv-text/video_resampled/VprUqqG5VKY_0_4.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/VprUqqG5VKY_0_4.m4a",
      "fps": 25.0,
      "duration": 10.12,
      "num_frames": 253,
      "height": 1082,
      "width": 1136,
      "language": "English",
      "emotion": null,
      "index": "VprUqqG5VKY_0_4",
      "caption": "a man with a neatly styled beard and hair, dressed in a black shirt, set against a warm-toned background that includes curtains and a lamp, creating an intimate and focused atmosphere.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/VprUqqG5VKY_0_4.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_ASWSXPFADBg_20_0": {
      "video_path": "celebv-text/video_resampled/ASWSXPFADBg_20_0.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/ASWSXPFADBg_20_0.m4a",
      "fps": 25.0,
      "duration": 8.12,
      "num_frames": 203,
      "height": 1130,
      "width": 1156,
      "language": "English",
      "emotion": null,
      "index": "ASWSXPFADBg_20_0",
      "caption": "a woman with blonde, wavy hair speaking at what appears to be a promotional event or interview, as indicated by the movie poster in the background.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/ASWSXPFADBg_20_0.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_3D-PNIFlteM_10_1": {
      "video_path": "celebv-text/video_resampled/3D-PNIFlteM_10_1.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/3D-PNIFlteM_10_1.m4a",
      "fps": 29.97,
      "duration": 8.742075408742076,
      "num_frames": 262,
      "height": 630,
      "width": 750,
      "language": "English",
      "emotion": null,
      "index": "3D-PNIFlteM_10_1",
      "caption": "a person demonstrating or discussing a product, holding a small green packet labeled \"Dolce Polvere,\" while seated in a cozy setting with string lights and a white brick wall in the background.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/3D-PNIFlteM_10_1.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_fgqCKLwNkLY_1_4": {
      "video_path": "celebv-text/video_resampled/fgqCKLwNkLY_1_4.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/fgqCKLwNkLY_1_4.m4a",
      "fps": 30.0,
      "duration": 11.433333333333334,
      "num_frames": 343,
      "height": 1054,
      "width": 1148,
      "language": "English",
      "emotion": null,
      "index": "fgqCKLwNkLY_1_4",
      "caption": "a person with blue hair and glasses, wearing headphones, speaking directly to the camera in a room illuminated by colorful lighting. The background includes shelves filled with various items, suggesting a personal or gaming space.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/fgqCKLwNkLY_1_4.npy",
      "source_dataset": "celebv-text"
    },
    "celebv-text_4u8tidzYmzM_2_2": {
      "video_path": "celebv-text/video_resampled/4u8tidzYmzM_2_2.mp4",
      "audio_path": "celebv-text/audio/celebvtext_audio/4u8tidzYmzM_2_2.m4a",
      "fps": 30.0,
      "duration": 8.766666666666667,
      "num_frames": 263,
      "height": 866,
      "width": 944,
      "language": "English",
      "emotion": null,
      "index": "4u8tidzYmzM_2_2",
      "caption": "a person with red hair, wearing a pink top, speaking directly to the camera against a backdrop of a white door and a light-colored wall. The individual appears to be engaged in a conversation or presentation, as they are looking towards the camera and seem to be speaking.",
      "sr": null,
      "keypoint_path": "celebv-text/keypoint308/4u8tidzYmzM_2_2.npy",
      "source_dataset": "celebv-text"
    }
  }
}