python samples/video_blip_generate_action_narration.py \
  --device cuda \
  --model pretrained_models/video-blip-flan-t5-xl-ego4d \
  "clothes on bed_Merom_1_int_0_camera.mp4" \
  "What is shown in the video?"