FPS = "fps"
DURATION = "duration"
NUM_FRAMES = "num_frames"
VIDEO_PATH = "video_path"
SEG_PATH = "seg_path"  # image of foreground with transparent background
LANGUAGE = "language"
SAMPLE_INDEX = "index"  # index of the sample in the dataset
EMOTION = "emotion"
CAPTION = "caption"
AUDIO_PATH = "audio_path"
SR = "sr"
FACE_BBOX = "face_bbox"
HEIGHT = "height"
WIDTH = "width"

ACTION = "action"
APPEARANCE = "appearance"
