{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Howto100M\n",
    "maybe not a good fit for caption choice."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = \"HowTo100M\"\n",
    "data_file_name = 'caption.json'\n",
    "\n",
    "data_dir = os.path.abspath(os.path.join(os.path.abspath(\"__file__\"), f\"../../../data/{dataset}\"))\n",
    "assert (os.path.exists(data_dir))\n",
    "\n",
    "data_path = os.path.join(data_dir, data_file_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = json.load(open(data_path, 'r'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "vid = 'nVbIUDjzWY4'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "datapoint = data[vid]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['folks roylott v twins',\n",
       " 'aids pink project',\n",
       " 'primed pieces guide coded',\n",
       " 'block times got',\n",
       " 'nice surfaces paint job',\n",
       " 'walmart preparing',\n",
       " 'get ready exterior paint job',\n",
       " 'inside tunnel tank',\n",
       " 'inside fender',\n",
       " 'care rear',\n",
       " 'fenders line',\n",
       " 'camera',\n",
       " 'primed anti',\n",
       " 'corrosive primer took red scuff pad',\n",
       " 'scuff pad inside',\n",
       " 'area',\n",
       " 'welded brackets fender',\n",
       " 'little seam sealer',\n",
       " 'pieces',\n",
       " 'tape paper',\n",
       " 'outside flat',\n",
       " 'block inside',\n",
       " 'paint job black',\n",
       " 'clean',\n",
       " 'get into spending',\n",
       " 'lot time putting base coat clear',\n",
       " 'coat inside',\n",
       " \"fender nobody's\",\n",
       " 'sense',\n",
       " 'tape',\n",
       " 'trim block got nice',\n",
       " 'semi gloss blacked',\n",
       " 'inside',\n",
       " 'inside tunnel',\n",
       " 'tape get',\n",
       " 'stuff primed',\n",
       " 'setup get things get garage prep',\n",
       " 'get jigs',\n",
       " 'get defenders',\n",
       " 'tank mounted get ready paint',\n",
       " 'point come',\n",
       " 'black base',\n",
       " 'coat folks',\n",
       " 'parts sprayed',\n",
       " 'inside pieces trim',\n",
       " 'black gives satin black',\n",
       " 'finish nice quick',\n",
       " 'cleans paint',\n",
       " 'gloss black outside',\n",
       " 'visible',\n",
       " 'under',\n",
       " 'acceptable grab camera',\n",
       " \"what's\",\n",
       " 'connell',\n",
       " 'inside fender',\n",
       " 'spread seam',\n",
       " 'sealer welding',\n",
       " 'cleans air',\n",
       " 'fender',\n",
       " 'blacked inside',\n",
       " 'dry pretty good prep',\n",
       " 'garage get get ready',\n",
       " 'items paint',\n",
       " 'fixtures stands get',\n",
       " 'painted',\n",
       " 'set come',\n",
       " 'go paint process',\n",
       " 'get base coat',\n",
       " 'got',\n",
       " 'garage blown',\n",
       " 'wet floor edges',\n",
       " 'walls nooks crannies',\n",
       " 'hose seal dust',\n",
       " 'took bench covered',\n",
       " 'masking paper fixtures',\n",
       " 'fenders couple',\n",
       " 'masking paper',\n",
       " 'taken fenders got',\n",
       " 'wall mounted',\n",
       " 'minute precaution',\n",
       " 'took red scuff pad went quickly',\n",
       " 'fenders thing',\n",
       " 'compressed air',\n",
       " 'mate',\n",
       " 'air hose blow',\n",
       " 'fixture fenders little',\n",
       " 'nooks crannies make',\n",
       " 'blow dust',\n",
       " 'little water',\n",
       " \"what's\",\n",
       " 'anthon things nature',\n",
       " 'get squared away',\n",
       " 'wash pretty cleaner',\n",
       " 'point pretty ready',\n",
       " 'go get gun set pink ball',\n",
       " 'set base',\n",
       " 'tape folks get blowing',\n",
       " 'washed pre cleanup',\n",
       " 'looked good good',\n",
       " 'go got garage set',\n",
       " 'closed lights',\n",
       " 'turn exhaust fan got gun',\n",
       " 'set got face coat',\n",
       " 'little close',\n",
       " 'got',\n",
       " \"here's tank got set\",\n",
       " 'jig past',\n",
       " 'works good',\n",
       " '2x4 attach',\n",
       " 'screws end sit',\n",
       " 'end',\n",
       " 'tank',\n",
       " 'flip get',\n",
       " 'good',\n",
       " 'flip',\n",
       " 'seesaws forth got',\n",
       " 'set got fender',\n",
       " 'jig got',\n",
       " 'fender jig got',\n",
       " 'spray gun got base coat',\n",
       " 'mixed base coat uh',\n",
       " 'sherwin williams dimension mixin',\n",
       " 'mixing ratio parts base',\n",
       " 'reducer',\n",
       " 'medium coats',\n",
       " 'surface tap',\n",
       " 'pieces get fan gun',\n",
       " 'get paint strained into gun',\n",
       " 'coat base',\n",
       " 'idea cope',\n",
       " 'cover piece color',\n",
       " 'coat got',\n",
       " 'able',\n",
       " 'panic',\n",
       " 'flash depending',\n",
       " 'environment mean worried',\n",
       " 'seventy eighty degrees',\n",
       " 'pretty perfect flash',\n",
       " 'minutes',\n",
       " 'notice little matte flat',\n",
       " 'finish get',\n",
       " 'eye finish cup',\n",
       " 'finished',\n",
       " 'application wait 10 15 minutes',\n",
       " 'coat coats',\n",
       " 'covered',\n",
       " 'think coat',\n",
       " 'recommend',\n",
       " 'flash go',\n",
       " 'edges sides undersides',\n",
       " 'make areas',\n",
       " 'paint',\n",
       " 'clear',\n",
       " 'start process',\n",
       " 'minutes',\n",
       " 'check areas covered',\n",
       " 'gun dust areas',\n",
       " 'make big difference',\n",
       " 'happened painter',\n",
       " 'paint got',\n",
       " 'little',\n",
       " 'said',\n",
       " \"didn't hit\",\n",
       " 'certain angle',\n",
       " 'sitting paint',\n",
       " 'screwed',\n",
       " 'gotta sand thing',\n",
       " 'paint get caught',\n",
       " 'make check edges',\n",
       " 'apply base coat',\n",
       " 'come talk',\n",
       " 'clear coat']"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datapoint['text']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### MSR-VTT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "env: OPENAI_API_KEY=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\n"
     ]
    }
   ],
   "source": [
    "%env OPENAI_API_KEY=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"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "\n",
    "dataset = \"MSRVTT\"\n",
    "data_file_name = 'MSR_VTT.json'\n",
    "cache_file_name = f\"{dataset}_gpt_cache.json\"\n",
    "        \n",
    "data_dir = os.path.abspath(os.path.join(os.path.abspath(\"__file__\"), f\"../../../data/{dataset}\"))\n",
    "assert (os.path.exists(data_dir))\n",
    "\n",
    "data_nl_dir = os.path.join(data_dir, 'nl2spec')\n",
    "if not os.path.exists(data_nl_dir):\n",
    "        os.mkdir(data_nl_dir)\n",
    "        \n",
    "data_path = os.path.join(data_dir, \"annotation\", data_file_name)\n",
    "cache_path = os.path.join(data_nl_dir, cache_file_name)\n",
    "assert (os.path.exists(data_path))\n",
    "\n",
    "data = json.load(open(data_path, 'r'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "msr_vtt_datapoints = random.sample(data[\"annotations\"], k=5)\n",
    "msr_vtt_caption_ls = [i['caption'] for i in msr_vtt_datapoints] \n",
    "del data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['a woman transfers dough to plastic wrap and adds the leftover pieces to the ball of dough',\n",
       " 'a contestant talking about beauty in a beauty pageant',\n",
       " 'a man is sitting',\n",
       " 'football and basketball vine clips',\n",
       " 'sexy brunette talking about her dog']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "msr_vtt_caption_ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from caption2spec_pvsg import caption2spec\n",
    "\n",
    "spec = caption2spec(msr_vtt_caption_ls, cache_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'a woman transfers dough to plastic wrap and adds the leftover pieces to the ball of dough': {'caption': 'a woman transfers dough to plastic wrap and adds the leftover pieces to the ball of dough',\n",
       "  'sequential descriptions': ['woman A transfer dough B to plastic wrap C',\n",
       "   'woman A add leftover pieces D to ball of dough E'],\n",
       "  'time stamps': {'1': {'description': ['woman A transfer dough B to plastic wrap C'],\n",
       "    'programmatic': ['binary(transfer, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, dough)',\n",
       "     'binary(to, B, C)',\n",
       "     'name(C, plastic wrap)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['woman A add leftover pieces D to ball of dough E'],\n",
       "    'programmatic': ['binary(add, A, D)',\n",
       "     'name(A, woman)',\n",
       "     'name(D, leftover pieces)',\n",
       "     'binary(to, D, E)',\n",
       "     'name(E, ball of dough)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'a contestant talking about beauty in a beauty pageant': {'caption': 'a contestant talking about beauty in a beauty pageant',\n",
       "  'sequential descriptions': ['contestant A talk about beauty'],\n",
       "  'time stamps': {'1': {'description': ['contestant A talk about beauty'],\n",
       "    'programmatic': ['unary(talk, A)', 'name(A, contestant)', 'about(beauty)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " 'a man is sitting': {'caption': 'a man is sitting',\n",
       "  'sequential descriptions': ['man A sit'],\n",
       "  'time stamps': {'1': {'description': ['man A sit'],\n",
       "    'programmatic': ['unary(sit, A)', 'name(A, man)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'football and basketball vine clips': {'caption': 'football and basketball vine clips',\n",
       "  'sequential descriptions': ['clip A of football', 'clip B of basketball'],\n",
       "  'time stamps': {'1': {'description': ['clip A of football'],\n",
       "    'programmatic': ['unary(clip, A)', 'name(A, football)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['clip B of basketball'],\n",
       "    'programmatic': ['unary(clip, B)', 'name(B, basketball)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'late'}}},\n",
       " 'sexy brunette talking about her dog': {'caption': 'sexy brunette talking about her dog',\n",
       "  'sequential descriptions': ['brunette A talk about dog B'],\n",
       "  'time stamps': {'1': {'description': ['brunette A talk about dog B'],\n",
       "    'programmatic': ['unary(talk, A)', 'name(A, brunette)', 'about(dog, B)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}}}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "spec"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Activity Net"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "\n",
    "dataset = \"activity_net\"\n",
    "data_file_name = 'train.json'\n",
    "cache_file_name = f\"{dataset}_gpt_cache.json\"\n",
    "\n",
    "data_dir = os.path.abspath(os.path.join(os.path.abspath(\"__file__\"), f\"../../../data/{dataset}\"))\n",
    "assert (os.path.exists(data_dir))\n",
    "\n",
    "data_nl_dir = os.path.join(data_dir, 'nl2spec')\n",
    "if not os.path.exists(data_nl_dir):\n",
    "    os.mkdir(data_nl_dir)\n",
    "        \n",
    "cache_path = os.path.join(data_nl_dir, cache_file_name)\n",
    "data_path = os.path.join(data_dir, data_file_name)\n",
    "assert (os.path.exists(data_path))\n",
    "\n",
    "data = json.load(open(data_path, 'r'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "vid = 'v_QOlSCBRmfWY'\n",
    "datapoint = data[vid]\n",
    "activity_net_caption_ls = datapoint['sentences']\n",
    "del data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['A young woman is seen standing in a room and leads into her dancing.',\n",
       " ' The girl dances around the room while the camera captures her movements.',\n",
       " ' She continues dancing around the room and ends by laying on the floor.']"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "activity_net_caption_ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from caption2spec_pvsg import caption2spec\n",
    "\n",
    "activity_net_spec = caption2spec(activity_net_caption_ls, cache_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A young woman is seen standing in a room and leads into her dancing.': {'caption': 'A young woman is seen standing in a room and leads into her dancing.',\n",
       "  'sequential descriptions': ['young woman A stand in room', 'woman A dance'],\n",
       "  'time stamps': {'1': {'description': ['young woman A stand in room'],\n",
       "    'programmatic': ['unary(stand, A)',\n",
       "     'name(A, young woman)',\n",
       "     'location(A, room)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['woman A dance'],\n",
       "    'programmatic': ['unary(dance, A)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " 'The girl dances around the room while the camera captures her movements.': {'caption': 'The girl dances around the room while the camera captures her movements.',\n",
       "  'sequential descriptions': ['girl A dance around room',\n",
       "   'camera capture movement of A'],\n",
       "  'time stamps': {'1': {'description': ['girl A dance around room'],\n",
       "    'programmatic': ['unary(dance, A)',\n",
       "     'name(A, girl)',\n",
       "     'motion(around, A, room)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['camera capture movement of A'],\n",
       "    'programmatic': ['binary(capture, camera, movement)',\n",
       "     'target(movement, A)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'She continues dancing around the room and ends by laying on the floor.': {'caption': 'She continues dancing around the room and ends by laying on the floor.',\n",
       "  'sequential descriptions': ['she A dance around room', 'A lay on floor'],\n",
       "  'time stamps': {'1': {'description': ['she A dance around room'],\n",
       "    'programmatic': ['unary(dance, A)',\n",
       "     'name(A, she)',\n",
       "     'motion(around, A, room)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['A lay on floor'],\n",
       "    'programmatic': ['unary(lay, A)', 'location(A, floor)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}}}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "activity_net_spec"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### OpenPVSG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = \"open_pvsg\"\n",
    "cache_file_name = f\"{dataset}_gpt_cache.json\"\n",
    "data_file_name = 'pvsg.json'\n",
    "batch_size = 5\n",
    "\n",
    "data_dir = os.path.abspath(os.path.join(os.path.abspath(\"__file__\"), f\"../../../data/{dataset}\"))\n",
    "data_nl_dir = os.path.join(data_dir, 'nl2spec')\n",
    "assert (os.path.exists(data_dir))\n",
    "if not os.path.exists(data_nl_dir):\n",
    "    os.mkdir(data_nl_dir)\n",
    "\n",
    "cache_path = os.path.join(data_nl_dir, cache_file_name)\n",
    "data_path = os.path.join(data_dir, data_file_name)\n",
    "\n",
    "with open(data_path, 'r') as f:\n",
    "    anno = json.load(f)\n",
    "\n",
    "# See video id in anno['split'].\n",
    "data = {data_dict['video_id']: data_dict for data_dict in anno['data']}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "from caption2spec_pvsg import clean_cap\n",
    "ego4d_vid = anno['split']['ego4d']['train'][0]\n",
    "vidor_vid = anno['split']['vidor']['train'][0]\n",
    "epic_kitchen_vid = anno['split']['epic_kitchen']['train'][0]\n",
    "\n",
    "ego_4d_datapoint = data[ego4d_vid]\n",
    "vidor_datapoint = data[vidor_vid]\n",
    "epic_kitchen_datapoint = data[epic_kitchen_vid]\n",
    "\n",
    "ego_4d_captions = [clean_cap(i['description']) for i in ego_4d_datapoint['captions']]\n",
    "vidor_captions = [clean_cap(i['description']) for i in vidor_datapoint['captions']]\n",
    "epic_kitchen_captions = [clean_cap(i['description']) for i in epic_kitchen_datapoint['captions']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Ego4D"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['I dipped a brush in paint and brushed the door.',\n",
       " 'I set down the brush and picked up another brush to continue brushing the door.',\n",
       " 'I set down the brush and picked up another brush to continue brushing the door.',\n",
       " 'I set down the brush and picked up another brush to continue brushing the door.',\n",
       " 'I set down the brush.']"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ego_4d_captions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "from caption2spec_pvsg import caption2spec\n",
    "\n",
    "ego_4d_spec = caption2spec(ego_4d_captions, cache_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A man carries a child and walks to the left from behind a woman holding another child.': {'caption': 'A man carries a child and walks to the left from behind a woman holding another child.',\n",
       "  'sequential descriptions': ['man A carry child B, women C hold child D, man A is behind women C',\n",
       "   'man A walk',\n",
       "   'man A at left'],\n",
       "  'time stamps': {'1': {'description': ['man A carry child B',\n",
       "     'women C hold child D',\n",
       "     'man A is behind women C'],\n",
       "    'programmatic': ['binary(carry, A, B)',\n",
       "     'name(A, man)',\n",
       "     'name(B, child)',\n",
       "     'binary(hold, C, D)',\n",
       "     'name(C, women)',\n",
       "     'name(D, child)',\n",
       "     'binary(behind, A, C)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man A walk'],\n",
       "    'programmatic': ['unary(walk, A)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['man A at left'],\n",
       "    'programmatic': ['unary(left, A)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The woman rocks and holds the child, singing a birthday song together with another woman to celebrate the birthday of the girl.': {'caption': 'The woman rocks and holds the child, singing a birthday song together with another woman to celebrate the birthday of the girl.',\n",
       "  'sequential descriptions': ['woman A rocks and holds the child B, woman A and women C sings birthday song'],\n",
       "  'time stamps': {'1': {'description': ['woman A rocks and holds the child B, woman A and women C sings birthday song'],\n",
       "    'programmatic': ['binary(rock, A, B)',\n",
       "     'binary(hold, A, B)',\n",
       "     'unary(sing, A)',\n",
       "     'unary(sing, C)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " 'I adjusted my cellphone and continued playing the ukulele.': {'caption': 'I adjusted my cellphone and continued playing the ukulele.',\n",
       "  'sequential descriptions': ['person A adjust cellphone B',\n",
       "   'person A play ukulele C'],\n",
       "  'time stamps': {'1': {'description': ['person A adjust cellphone B'],\n",
       "    'programmatic': ['binary(adjust, A, B)',\n",
       "     'name(A, person)',\n",
       "     'name(B, cellphone)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['person A play ukulele C'],\n",
       "    'programmatic': ['binary(play, A, C)',\n",
       "     'name(A, person)',\n",
       "     'name(C, ukulele)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'late'}}},\n",
       " 'The woman smiles and holds the child in front of the cake.': {'caption': 'The woman smiles and holds the child in front of the cake.',\n",
       "  'sequential descriptions': ['woman A smiles and holds child B in front of cake C'],\n",
       "  'time stamps': {'1': {'description': ['woman A smiles and holds child B in front of cake C'],\n",
       "    'programmatic': ['unary(smile, A)',\n",
       "     'binary(hold, A, B)',\n",
       "     'binary(in_front_of, B, C)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, child)',\n",
       "     'name(C, cake)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " \"The camera shifts to the right, showing a woman, a man, a woman holding a baby, two men, and a woman holding a boy, all singing a birthday song to celebrate the girl's birthday.\": {'caption': \"The camera shifts to the right, showing a woman, a man, a woman holding a baby, two men, and a woman holding a boy, all singing a birthday song to celebrate the girl's birthday.\",\n",
       "  'sequential descriptions': ['camera shift right',\n",
       "   'woman A, man B, woman C hold baby D, man E, man F, woman G hold boy H sings birthday song'],\n",
       "  'time stamps': {'1': {'description': ['camera shift right'],\n",
       "    'programmatic': ['unary(shift_right, camera)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['woman A, man B, woman C hold baby D, man E, man F, woman G hold boy H sings birthday song'],\n",
       "    'programmatic': ['binary(hold, C, D)',\n",
       "     'binary(hold, G, H)',\n",
       "     'unary(sing, A)',\n",
       "     'unary(sing, B)',\n",
       "     'unary(sing, C)',\n",
       "     'unary(sing, E)',\n",
       "     'unary(sing, F)',\n",
       "     'unary(sing, G)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, man)',\n",
       "     'name(C, woman)',\n",
       "     'name(D, baby)',\n",
       "     'name(E, man)',\n",
       "     'name(F, man)',\n",
       "     'name(G, woman)',\n",
       "     'name(H, boy)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " \"The woman on the left side, holding the child, and the man and woman are singing a birthday song to celebrate the girl's birthday.\": {'caption': \"The woman on the left side, holding the child, and the man and woman are singing a birthday song to celebrate the girl's birthday.\",\n",
       "  'sequential descriptions': ['woman A on left side holding child B, man C and woman D sing birthday song'],\n",
       "  'time stamps': {'1': {'description': ['woman A on left side holding child B, man C and woman D sing birthday song'],\n",
       "    'programmatic': ['unary(left, A)',\n",
       "     'binary(hold, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, child)',\n",
       "     'unary(sing, C)',\n",
       "     'unary(sing, D)',\n",
       "     'name(C, man)',\n",
       "     'name(D, woman)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " 'The girl and the woman blow out the candles on the cake together and laugh happily.': {'caption': 'The girl and the woman blow out the candles on the cake together and laugh happily.',\n",
       "  'sequential descriptions': ['girl A and woman B blow out candles on cake C',\n",
       "   'girl A and woman B laugh'],\n",
       "  'time stamps': {'1': {'description': ['girl A and woman B blow out candles on cake C'],\n",
       "    'programmatic': ['binary(blow_out, A, C)',\n",
       "     'binary(blow_out, B, C)',\n",
       "     'name(A, girl)',\n",
       "     'name(B, woman)',\n",
       "     'name(C, cake)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['girl A and woman B laugh'],\n",
       "    'programmatic': ['unary(laugh, A)', 'unary(laugh, B)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The woman picks up the candles.': {'caption': 'The woman picks up the candles.',\n",
       "  'sequential descriptions': ['woman A picks up candles B'],\n",
       "  'time stamps': {'1': {'description': ['woman A picks up candles B'],\n",
       "    'programmatic': ['binary(pick_up, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, candles)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'A woman is teasing a kitten with a piece of meat, and the kitten is peeking its head from a chair to look at the meat.': {'caption': 'A woman is teasing a kitten with a piece of meat, and the kitten is peeking its head from a chair to look at the meat.',\n",
       "  'sequential descriptions': ['woman A tease kitten B with meat C',\n",
       "   'kitten B peek head from chair to look at meat C'],\n",
       "  'time stamps': {'1': {'description': ['woman A tease kitten B with meat C'],\n",
       "    'programmatic': ['binary(tease, A, B)',\n",
       "     'binary(with, A, C)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, kitten)',\n",
       "     'name(C, meat)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['kitten B peek head from chair to look at meat C'],\n",
       "    'programmatic': ['unary(peek, B)',\n",
       "     'binary(from, B, chair)',\n",
       "     'binary(look_at, B, C)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The camera zooms in on the kitten, and the kitten is looking at the camera.': {'caption': 'The camera zooms in on the kitten, and the kitten is looking at the camera.',\n",
       "  'sequential descriptions': ['camera A zoom in on kitten B',\n",
       "   'kitten B look at camera A'],\n",
       "  'time stamps': {'1': {'description': ['camera A zoom in on kitten B'],\n",
       "    'programmatic': ['binary(zoom_in_on, A, B)',\n",
       "     'name(A, camera)',\n",
       "     'name(B, kitten)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['kitten B look at camera A'],\n",
       "    'programmatic': ['binary(look_at, B, A)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The woman is using a knife to prepare the meat, and the kitten is watching the meat.': {'caption': 'The woman is using a knife to prepare the meat, and the kitten is watching the meat.',\n",
       "  'sequential descriptions': ['woman A use knife to prepare meat B',\n",
       "   'kitten C watch meat B'],\n",
       "  'time stamps': {'1': {'description': ['woman A use knife to prepare meat B'],\n",
       "    'programmatic': ['binary(use, A, knife)',\n",
       "     'binary(prepare, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, meat)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['kitten C watch meat B'],\n",
       "    'programmatic': ['binary(watch, C, B)', 'name(C, kitten)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The woman continues to use the knife to prepare the meat, and the kitten looks at the camera for a while and then looks at the meat.': {'caption': 'The woman continues to use the knife to prepare the meat, and the kitten looks at the camera for a while and then looks at the meat.',\n",
       "  'sequential descriptions': ['woman A use knife B to prepare meat C',\n",
       "   'kitten D looks at camera',\n",
       "   'kitten D looks at meat C'],\n",
       "  'time stamps': {'1': {'description': ['woman A use knife B to prepare meat C'],\n",
       "    'programmatic': ['binary(use, A, B)',\n",
       "     'binary(prepare, A, C)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, knife)',\n",
       "     'name(C, meat)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['kitten D looks at camera',\n",
       "     'kitten D looks at meat C'],\n",
       "    'programmatic': ['unary(look, D)',\n",
       "     'binary(look, D, C)',\n",
       "     'name(D, kitten)',\n",
       "     'name(C, meat)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The little girl picks up a ball from the floor.': {'caption': 'The little girl picks up a ball from the floor.',\n",
       "  'sequential descriptions': ['girl A picks up ball B from floor'],\n",
       "  'time stamps': {'1': {'description': ['girl A picks up ball B from floor'],\n",
       "    'programmatic': ['binary(pick_up, A, B)',\n",
       "     'name(A, girl)',\n",
       "     'name(B, ball)',\n",
       "     'unary(floor, B)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The little girl throws a ball.': {'caption': 'The little girl throws a ball.',\n",
       "  'sequential descriptions': ['girl A throws ball B'],\n",
       "  'time stamps': {'1': {'description': ['girl A throws ball B'],\n",
       "    'programmatic': ['binary(throw, A, B)', 'name(A, girl)', 'name(B, ball)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'An adult woman walks over and picks up the ball.': {'caption': 'An adult woman walks over and picks up the ball.',\n",
       "  'sequential descriptions': ['woman A walks over', 'woman A picks up ball B'],\n",
       "  'time stamps': {'1': {'description': ['woman A walks over'],\n",
       "    'programmatic': ['unary(walk_over, A)', 'name(A, woman)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['woman A picks up ball B'],\n",
       "    'programmatic': ['binary(pick_up, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, ball)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The woman walks back, holding the ball.': {'caption': 'The woman walks back, holding the ball.',\n",
       "  'sequential descriptions': ['woman A walks back', 'woman A holds the ball'],\n",
       "  'time stamps': {'1': {'description': ['woman A walks back'],\n",
       "    'programmatic': ['unary(walk_back, A)', 'name(A, woman)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['woman A holds the ball'],\n",
       "    'programmatic': ['binary(hold, A, B)', 'name(A, woman)', 'name(B, ball)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The little girl runs behind the woman.': {'caption': 'The little girl runs behind the woman.',\n",
       "  'sequential descriptions': ['girl A runs', 'girl A is behind woman B'],\n",
       "  'time stamps': {'1': {'description': ['girl A runs'],\n",
       "    'programmatic': ['unary(run, A)', 'name(A, girl)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['girl A is behind woman B'],\n",
       "    'programmatic': ['binary(behind, A, B)',\n",
       "     'name(A, girl)',\n",
       "     'name(B, woman)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'Another little girl runs over and picks up a ball.': {'caption': 'Another little girl runs over and picks up a ball.',\n",
       "  'sequential descriptions': ['girl C runs over', 'girl C picks up ball D'],\n",
       "  'time stamps': {'1': {'description': ['girl C runs over'],\n",
       "    'programmatic': ['unary(run_over, C)', 'name(C, girl)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['girl C picks up ball D'],\n",
       "    'programmatic': ['binary(pick_up, C, D)',\n",
       "     'name(C, girl)',\n",
       "     'name(D, ball)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The second little girl throws the ball forward.': {'caption': 'The second little girl throws the ball forward.',\n",
       "  'sequential descriptions': ['girl E throws ball F forward'],\n",
       "  'time stamps': {'1': {'description': ['girl E throws ball F forward'],\n",
       "    'programmatic': ['binary(throw_forward, E, F)',\n",
       "     'name(E, girl)',\n",
       "     'name(F, ball)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The woman throws the ball toward the second little girl.': {'caption': 'The woman throws the ball toward the second little girl.',\n",
       "  'sequential descriptions': ['woman G throws ball H toward girl I'],\n",
       "  'time stamps': {'1': {'description': ['woman G throws ball H toward girl I'],\n",
       "    'programmatic': ['binary(throw_toward, G, H)',\n",
       "     'binary(toward, H, I)',\n",
       "     'name(G, woman)',\n",
       "     'name(H, ball)',\n",
       "     'name(I, girl)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'late'}}},\n",
       " 'The second little girl runs over to pick up the ball.': {'caption': 'The second little girl runs over to pick up the ball.',\n",
       "  'sequential descriptions': ['girl B run', 'girl B pick up ball C'],\n",
       "  'time stamps': {'1': {'description': ['girl B run'],\n",
       "    'programmatic': ['unary(run, B)', 'name(B, girl)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['girl B pick up ball C'],\n",
       "    'programmatic': ['binary(pick_up, B, C)',\n",
       "     'name(B, girl)',\n",
       "     'name(C, ball)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The second little girl runs towards the woman from behind.': {'caption': 'The second little girl runs towards the woman from behind.',\n",
       "  'sequential descriptions': ['girl B run towards woman A from behind'],\n",
       "  'time stamps': {'1': {'description': ['girl B run towards woman A from behind'],\n",
       "    'programmatic': ['unary(run, B)',\n",
       "     'binary(towards, B, A)',\n",
       "     'binary(from_behind, B, A)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, girl)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The first little girl runs out from behind the woman.': {'caption': 'The first little girl runs out from behind the woman.',\n",
       "  'sequential descriptions': ['girl A run out from behind woman B'],\n",
       "  'time stamps': {'1': {'description': ['girl A run out from behind woman B'],\n",
       "    'programmatic': ['unary(run_out, A)',\n",
       "     'binary(from_behind, A, B)',\n",
       "     'name(A, girl)',\n",
       "     'name(B, woman)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The first little girl runs towards the woman.': {'caption': 'The first little girl runs towards the woman.',\n",
       "  'sequential descriptions': ['girl A run towards woman B'],\n",
       "  'time stamps': {'1': {'description': ['girl A run towards woman B'],\n",
       "    'programmatic': ['unary(run, A)',\n",
       "     'binary(towards, A, B)',\n",
       "     'name(A, girl)',\n",
       "     'name(B, woman)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The camera angle changes.': {'caption': 'The camera angle changes.',\n",
       "  'sequential descriptions': ['camera angle change'],\n",
       "  'time stamps': {'1': {'description': ['camera angle change'],\n",
       "    'programmatic': ['unary(change, camera_angle)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The first little girl is running.': {'caption': 'The first little girl is running.',\n",
       "  'sequential descriptions': ['girl A is running'],\n",
       "  'time stamps': {'1': {'description': ['girl A is running'],\n",
       "    'programmatic': ['unary(running, A)', 'name(A, girl)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The young boy walks through the television to pick up a gift.': {'caption': 'The young boy walks through the television to pick up a gift.',\n",
       "  'sequential descriptions': ['boy A walks through the television',\n",
       "   'boy A pick up gift B'],\n",
       "  'time stamps': {'1': {'description': ['boy A walks through the television'],\n",
       "    'programmatic': ['unary(walk, A)',\n",
       "     'name(A, boy)',\n",
       "     'binary(through, A, television)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['boy A pick up gift B'],\n",
       "    'programmatic': ['binary(pick_up, A, B)', 'name(A, boy)', 'name(B, gift)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The young boy hands the gift to a woman, who appears to be his mother.': {'caption': 'The young boy hands the gift to a woman, who appears to be his mother.',\n",
       "  'sequential descriptions': ['boy A hands gift B to woman C'],\n",
       "  'time stamps': {'1': {'description': ['boy A hands gift B to woman C'],\n",
       "    'programmatic': ['binary(hands, A, B)',\n",
       "     'binary(to, B, C)',\n",
       "     'name(A, boy)',\n",
       "     'name(B, gift)',\n",
       "     'name(C, woman)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The woman sets the gift aside.': {'caption': 'The woman sets the gift aside.',\n",
       "  'sequential descriptions': ['woman A sets gift B aside'],\n",
       "  'time stamps': {'1': {'description': ['woman A sets gift B aside'],\n",
       "    'programmatic': ['binary(sets_aside, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, gift)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The young boy walks towards the camera.': {'caption': 'The young boy walks towards the camera.',\n",
       "  'sequential descriptions': ['boy A walks towards the camera'],\n",
       "  'time stamps': {'1': {'description': ['boy A walks towards the camera'],\n",
       "    'programmatic': ['unary(walk, A)',\n",
       "     'binary(towards, A, camera)',\n",
       "     'name(A, boy)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The young boy receives another gift and sits on the floor.': {'caption': 'The young boy receives another gift and sits on the floor.',\n",
       "  'sequential descriptions': ['boy A receive gift B', 'boy A sit on floor'],\n",
       "  'time stamps': {'1': {'description': ['boy A receive gift B'],\n",
       "    'programmatic': ['binary(receive, A, B)', 'name(A, boy)', 'name(B, gift)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['boy A sit on floor'],\n",
       "    'programmatic': ['unary(sit, A)', 'location(A, floor)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The young boy unwraps the gift and takes out a box.': {'caption': 'The young boy unwraps the gift and takes out a box.',\n",
       "  'sequential descriptions': ['boy A unwrap gift B',\n",
       "   'boy A take out box C from gift B'],\n",
       "  'time stamps': {'1': {'description': ['boy A unwrap gift B'],\n",
       "    'programmatic': ['binary(unwrap, A, B)', 'name(A, boy)', 'name(B, gift)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['boy A take out box C from gift B'],\n",
       "    'programmatic': ['binary(take_out, A, C)',\n",
       "     'binary(from, C, B)',\n",
       "     'name(A, boy)',\n",
       "     'name(B, gift)',\n",
       "     'name(C, box)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The young boy holds the box and shows it to the surroundings.': {'caption': 'The young boy holds the box and shows it to the surroundings.',\n",
       "  'sequential descriptions': ['boy A hold box B',\n",
       "   'boy A show box B to surroundings'],\n",
       "  'time stamps': {'1': {'description': ['boy A hold box B'],\n",
       "    'programmatic': ['binary(hold, A, B)', 'name(A, boy)', 'name(B, box)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['boy A show box B to surroundings'],\n",
       "    'programmatic': ['binary(show, A, B)',\n",
       "     'location(B, surroundings)',\n",
       "     'name(A, boy)',\n",
       "     'name(B, box)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The young boy sits on the carpet.': {'caption': 'The young boy sits on the carpet.',\n",
       "  'sequential descriptions': ['boy A sit on carpet'],\n",
       "  'time stamps': {'1': {'description': ['boy A sit on carpet'],\n",
       "    'programmatic': ['unary(sit, A)', 'location(A, carpet)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'An adult is helping the baby to stand up.': {'caption': 'An adult is helping the baby to stand up.',\n",
       "  'sequential descriptions': ['adult A help baby B stand up'],\n",
       "  'time stamps': {'1': {'description': ['adult A help baby B stand up'],\n",
       "    'programmatic': ['binary(help, A, B)',\n",
       "     'unary(stand_up, B)',\n",
       "     'name(A, adult)',\n",
       "     'name(B, baby)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The baby stands up, and the adult lowers their head, letting the baby give them a kiss on the face.': {'caption': 'The baby stands up, and the adult lowers their head, letting the baby give them a kiss on the face.',\n",
       "  'sequential descriptions': ['baby A stand up',\n",
       "   'adult B lower head',\n",
       "   'baby A give kiss to adult B on face'],\n",
       "  'time stamps': {'1': {'description': ['baby A stand up'],\n",
       "    'programmatic': ['unary(stand, A)', 'name(A, baby)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['adult B lower head'],\n",
       "    'programmatic': ['unary(lower_head, B)', 'name(B, adult)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['baby A give kiss to adult B on face'],\n",
       "    'programmatic': ['binary(kiss, A, B)',\n",
       "     'name(A, baby)',\n",
       "     'name(B, adult)',\n",
       "     'unary(face, B)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The adult smiles and picks up the baby onto their lap.': {'caption': 'The adult smiles and picks up the baby onto their lap.',\n",
       "  'sequential descriptions': ['adult A smiles',\n",
       "   'adult A picks up baby B onto lap'],\n",
       "  'time stamps': {'1': {'description': ['adult A smiles'],\n",
       "    'programmatic': ['unary(smile, A)', 'name(A, adult)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['adult A picks up baby B onto lap'],\n",
       "    'programmatic': ['binary(pick_up, A, B)',\n",
       "     'binary(onto, B, lap_of_A)',\n",
       "     'name(A, adult)',\n",
       "     'name(B, baby)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The adult and the baby smile and look at the camera.': {'caption': 'The adult and the baby smile and look at the camera.',\n",
       "  'sequential descriptions': ['adult A and baby B smile',\n",
       "   'adult A and baby B look at camera'],\n",
       "  'time stamps': {'1': {'description': ['adult A and baby B smile'],\n",
       "    'programmatic': ['unary(smile, A)',\n",
       "     'unary(smile, B)',\n",
       "     'name(A, adult)',\n",
       "     'name(B, baby)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['adult A and baby B look at camera'],\n",
       "    'programmatic': ['binary(look_at, A, camera)',\n",
       "     'binary(look_at, B, camera)',\n",
       "     'name(A, adult)',\n",
       "     'name(B, baby)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'A man is teasing a kitten with a camera, and the kitten jumps up to grab the camera.': {'caption': 'A man is teasing a kitten with a camera, and the kitten jumps up to grab the camera.',\n",
       "  'sequential descriptions': ['man A tease kitten B with camera',\n",
       "   'kitten B jumps up to grab camera'],\n",
       "  'time stamps': {'1': {'description': ['man A tease kitten B with camera'],\n",
       "    'programmatic': ['binary(tease_with, A, camera)',\n",
       "     'binary(tease, A, B)',\n",
       "     'name(A, man)',\n",
       "     'name(B, kitten)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['kitten B jumps up to grab camera'],\n",
       "    'programmatic': ['unary(jump_up, B)',\n",
       "     'binary(grab, B, camera)',\n",
       "     'name(B, kitten)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The kitten falls off a mat, and the man helps it to get back up on the mat.': {'caption': 'The kitten falls off a mat, and the man helps it to get back up on the mat.',\n",
       "  'sequential descriptions': ['kitten A falls off mat',\n",
       "   'man B helps kitten A get back up on mat'],\n",
       "  'time stamps': {'1': {'description': ['kitten A falls off mat'],\n",
       "    'programmatic': ['unary(fall_off, A)',\n",
       "     'binary(fall_off_from, A, mat)',\n",
       "     'name(A, kitten)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man B helps kitten A get back up on mat'],\n",
       "    'programmatic': ['binary(help, B, A)',\n",
       "     'binary(get_back_up_on, A, mat)',\n",
       "     'name(A, kitten)',\n",
       "     'name(B, man)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'late'}}},\n",
       " 'The man continues to tease the kitten with the camera, and the kitten jumps up to grab the camera.': {'caption': 'The man continues to tease the kitten with the camera, and the kitten jumps up to grab the camera.',\n",
       "  'sequential descriptions': ['man A tease kitten B with camera C',\n",
       "   'kitten B jumps up',\n",
       "   'kitten B grab camera C'],\n",
       "  'time stamps': {'1': {'description': ['man A tease kitten B with camera C'],\n",
       "    'programmatic': ['binary(tease, A, B)',\n",
       "     'with(camera, C)',\n",
       "     'name(A, man)',\n",
       "     'name(B, kitten)',\n",
       "     'name(C, camera)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['kitten B jumps up'],\n",
       "    'programmatic': ['unary(jump_up, B)', 'name(B, kitten)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['kitten B grab camera C'],\n",
       "    'programmatic': ['binary(grab, B, C)',\n",
       "     'name(B, kitten)',\n",
       "     'name(C, camera)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The man stands and examines the camera, and the kitten looks at the camera.': {'caption': 'The man stands and examines the camera, and the kitten looks at the camera.',\n",
       "  'sequential descriptions': ['man A stands',\n",
       "   'man A examines camera B',\n",
       "   'kitten C looks at camera B'],\n",
       "  'time stamps': {'1': {'description': ['man A stands'],\n",
       "    'programmatic': ['unary(stand, A)', 'name(A, man)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man A examines camera B'],\n",
       "    'programmatic': ['binary(examine, A, B)',\n",
       "     'name(A, man)',\n",
       "     'name(B, camera)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['kitten C looks at camera B'],\n",
       "    'programmatic': ['binary(look_at, C, B)',\n",
       "     'name(C, kitten)',\n",
       "     'name(B, camera)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'A woman is playing the guitar, and a little girl dances around the woman as she plays.': {'caption': 'A woman is playing the guitar, and a little girl dances around the woman as she plays.',\n",
       "  'sequential descriptions': ['woman A plays guitar B',\n",
       "   'little girl C dances around woman A'],\n",
       "  'time stamps': {'1': {'description': ['woman A plays guitar B'],\n",
       "    'programmatic': ['binary(play, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, guitar)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['little girl C dances around woman A'],\n",
       "    'programmatic': ['binary(dance_around, C, A)',\n",
       "     'name(C, little_girl)',\n",
       "     'name(A, woman)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " 'The woman continues playing the guitar on her own.': {'caption': 'The woman continues playing the guitar on her own.',\n",
       "  'sequential descriptions': ['woman A plays guitar B on her own'],\n",
       "  'time stamps': {'1': {'description': ['woman A plays guitar B on her own'],\n",
       "    'programmatic': ['binary(play, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, guitar)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " \"The little girl brings a piece of paper and dances along with the sound of the woman's guitar.\": {'caption': \"The little girl brings a piece of paper and dances along with the sound of the woman's guitar.\",\n",
       "  'sequential descriptions': ['little girl A brings piece of paper B',\n",
       "   \"little girl A dances along with sound of woman C's guitar\"],\n",
       "  'time stamps': {'1': {'description': ['little girl A brings piece of paper B'],\n",
       "    'programmatic': ['binary(bring, A, B)',\n",
       "     'name(A, little_girl)',\n",
       "     'name(B, piece_of_paper)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': [\"little girl A dances along with sound of woman C's guitar\"],\n",
       "    'programmatic': [\"binary(dance_along_with, A, sound_of_C's_guitar)\",\n",
       "     'name(A, little_girl)',\n",
       "     'name(C, woman)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " 'An elderly person is speaking to three other adults.': {'caption': 'An elderly person is speaking to three other adults.',\n",
       "  'sequential descriptions': ['elderly person A speak to adults B, C, D'],\n",
       "  'time stamps': {'1': {'description': ['elderly person A speak to adults B, C, D'],\n",
       "    'programmatic': ['binary(speak, A, B)',\n",
       "     'binary(speak, A, C)',\n",
       "     'binary(speak, A, D)',\n",
       "     'name(A, elderly person)',\n",
       "     'name(B, adult)',\n",
       "     'name(C, adult)',\n",
       "     'name(D, adult)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'A woman is flipping through a book.': {'caption': 'A woman is flipping through a book.',\n",
       "  'sequential descriptions': ['woman A flip through book B'],\n",
       "  'time stamps': {'1': {'description': ['woman A flip through book B'],\n",
       "    'programmatic': ['binary(flip through, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, book)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'An adult waves to call a little boy over, and the little boy walks up to one of the adults.': {'caption': 'An adult waves to call a little boy over, and the little boy walks up to one of the adults.',\n",
       "  'sequential descriptions': ['adult A wave to call boy B',\n",
       "   'boy B walk up to adult A'],\n",
       "  'time stamps': {'1': {'description': ['adult A wave to call boy B'],\n",
       "    'programmatic': ['binary(wave to call, A, B)',\n",
       "     'name(A, adult)',\n",
       "     'name(B, boy)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['boy B walk up to adult A'],\n",
       "    'programmatic': ['binary(walk up to, B, A)',\n",
       "     'name(A, adult)',\n",
       "     'name(B, boy)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The dog stands up from the sofa.': {'caption': 'The dog stands up from the sofa.',\n",
       "  'sequential descriptions': ['dog A stand up from sofa B'],\n",
       "  'time stamps': {'1': {'description': ['dog A stand up from sofa B'],\n",
       "    'programmatic': ['binary(stand up from, A, B)',\n",
       "     'name(A, dog)',\n",
       "     'name(B, sofa)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The dog touches the man with its nose.': {'caption': 'The dog touches the man with its nose.',\n",
       "  'sequential descriptions': ['dog A touch man B with nose'],\n",
       "  'time stamps': {'1': {'description': ['dog A touch man B with nose'],\n",
       "    'programmatic': ['binary(touch with nose, A, B)',\n",
       "     'name(A, dog)',\n",
       "     'name(B, man)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The dog scratches the man with its front paws.': {'caption': 'The dog scratches the man with its front paws.',\n",
       "  'sequential descriptions': ['dog A scratch man B with front paws'],\n",
       "  'time stamps': {'1': {'description': ['dog A scratch man B with front paws'],\n",
       "    'programmatic': ['binary(scratch, A, B)',\n",
       "     'name(A, dog)',\n",
       "     'name(B, man)',\n",
       "     'unary(front_paws, A)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The man sitting on the sofa turns around and plays with the dog.': {'caption': 'The man sitting on the sofa turns around and plays with the dog.',\n",
       "  'sequential descriptions': ['man A sitting on sofa',\n",
       "   'man A turns around',\n",
       "   'man A plays with dog B'],\n",
       "  'time stamps': {'1': {'description': ['man A sitting on sofa'],\n",
       "    'programmatic': ['binary(sit_on, A, B)', 'name(A, man)', 'name(B, sofa)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man A turns around'],\n",
       "    'programmatic': ['unary(turn_around, A)', 'name(A, man)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['man A plays with dog B'],\n",
       "    'programmatic': ['binary(play_with, A, B)',\n",
       "     'name(A, man)',\n",
       "     'name(B, dog)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'late'}}},\n",
       " 'The little baby is lying on a towel and touching a big dog.': {'caption': 'The little baby is lying on a towel and touching a big dog.',\n",
       "  'sequential descriptions': ['baby A lying on towel',\n",
       "   'baby A touching dog B'],\n",
       "  'time stamps': {'1': {'description': ['baby A lying on towel'],\n",
       "    'programmatic': ['binary(lying_on, A, B)',\n",
       "     'name(A, baby)',\n",
       "     'name(B, towel)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['baby A touching dog B'],\n",
       "    'programmatic': ['binary(touching, A, B)',\n",
       "     'name(A, baby)',\n",
       "     'name(B, dog)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The big dog nuzzles the face of the little baby.': {'caption': 'The big dog nuzzles the face of the little baby.',\n",
       "  'sequential descriptions': ['dog A nuzzles face of baby B'],\n",
       "  'time stamps': {'1': {'description': ['dog A nuzzles face of baby B'],\n",
       "    'programmatic': ['binary(nuzzles, A, B)',\n",
       "     'name(A, dog)',\n",
       "     'name(B, baby)',\n",
       "     'unary(face, B)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The little baby gazes at the big dog.': {'caption': 'The little baby gazes at the big dog.',\n",
       "  'sequential descriptions': ['baby A gazes at dog B'],\n",
       "  'time stamps': {'1': {'description': ['baby A gazes at dog B'],\n",
       "    'programmatic': ['binary(gazes_at, A, B)',\n",
       "     'name(A, baby)',\n",
       "     'name(B, dog)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The little baby touches the nose of the big dog.': {'caption': 'The little baby touches the nose of the big dog.',\n",
       "  'sequential descriptions': ['baby A touch nose of dog B'],\n",
       "  'time stamps': {'1': {'description': ['baby A touch nose of dog B'],\n",
       "    'programmatic': ['binary(touch, A, B)',\n",
       "     'name(A, baby)',\n",
       "     'binary(nose, B, dog)',\n",
       "     'name(B, dog)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The little baby continues to gaze at the big dog.': {'caption': 'The little baby continues to gaze at the big dog.',\n",
       "  'sequential descriptions': ['baby A gaze at dog B'],\n",
       "  'time stamps': {'1': {'description': ['baby A gaze at dog B'],\n",
       "    'programmatic': ['binary(gaze, A, B)', 'name(A, baby)', 'name(B, dog)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The little baby uses hands to touch the big dog.': {'caption': 'The little baby uses hands to touch the big dog.',\n",
       "  'sequential descriptions': ['baby A use hands touch dog B'],\n",
       "  'time stamps': {'1': {'description': ['baby A use hands touch dog B'],\n",
       "    'programmatic': ['binary(use, A, hands)',\n",
       "     'binary(touch, hands, B)',\n",
       "     'name(A, baby)',\n",
       "     'name(B, dog)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The little baby and the big dog lie next to each other on the towel.': {'caption': 'The little baby and the big dog lie next to each other on the towel.',\n",
       "  'sequential descriptions': ['baby A and dog B lie next on towel C'],\n",
       "  'time stamps': {'1': {'description': ['baby A and dog B lie next on towel C'],\n",
       "    'programmatic': ['binary(lie, A, C)',\n",
       "     'binary(lie, B, C)',\n",
       "     'binary(next, A, B)',\n",
       "     'name(A, baby)',\n",
       "     'name(B, dog)',\n",
       "     'name(C, towel)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The big dog lies on the towel and kicks its legs.': {'caption': 'The big dog lies on the towel and kicks its legs.',\n",
       "  'sequential descriptions': ['dog A lie on towel B', 'dog A kick legs'],\n",
       "  'time stamps': {'1': {'description': ['dog A lie on towel B'],\n",
       "    'programmatic': ['binary(lie, A, B)', 'name(A, dog)', 'name(B, towel)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['dog A kick legs'],\n",
       "    'programmatic': ['unary(kick, A)', 'binary(legs, A, dog)', 'name(A, dog)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'I dipped a brush in paint and brushed the door.': {'caption': 'I dipped a brush in paint and brushed the door.',\n",
       "  'sequential descriptions': ['person A dip brush B in paint C',\n",
       "   'person A brush door D with brush B'],\n",
       "  'time stamps': {'1': {'description': ['person A dip brush B in paint C'],\n",
       "    'programmatic': ['binary(dip, B, C)',\n",
       "     'name(A, person)',\n",
       "     'name(B, brush)',\n",
       "     'name(C, paint)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['person A brush door D with brush B'],\n",
       "    'programmatic': ['binary(brush, A, D)',\n",
       "     'name(A, person)',\n",
       "     'name(B, brush)',\n",
       "     'name(D, door)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'I set down the brush and picked up another brush to continue brushing the door.': {'caption': 'I set down the brush and picked up another brush to continue brushing the door.',\n",
       "  'sequential descriptions': ['person A set down brush B',\n",
       "   'person A pick up brush C',\n",
       "   'person A continue brushing door D with brush C'],\n",
       "  'time stamps': {'1': {'description': ['person A set down brush B'],\n",
       "    'programmatic': ['unary(set_down, B)',\n",
       "     'name(A, person)',\n",
       "     'name(B, brush)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['person A pick up brush C'],\n",
       "    'programmatic': ['binary(pick_up, A, C)',\n",
       "     'name(A, person)',\n",
       "     'name(C, brush)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['person A continue brushing door D with brush C'],\n",
       "    'programmatic': ['binary(brush, A, D)',\n",
       "     'name(A, person)',\n",
       "     'name(C, brush)',\n",
       "     'name(D, door)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'late'}}},\n",
       " 'I set down the brush.': {'caption': 'I set down the brush.',\n",
       "  'sequential descriptions': ['person A set down brush B'],\n",
       "  'time stamps': {'1': {'description': ['person A set down brush B'],\n",
       "    'programmatic': ['unary(set_down, B)',\n",
       "     'name(A, person)',\n",
       "     'name(B, brush)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}}}"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ego_4d_spec"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### VidOR"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['A man carries a child and walks to the left from behind a woman holding another child.',\n",
       " 'The woman rocks and holds the child, singing a birthday song together with another woman to celebrate the birthday of the girl.',\n",
       " 'The woman smiles and holds the child in front of the cake.',\n",
       " \"The camera shifts to the right, showing a woman, a man, a woman holding a baby, two men, and a woman holding a boy, all singing a birthday song to celebrate the girl's birthday.\",\n",
       " \"The woman on the left side, holding the child, and the man and woman are singing a birthday song to celebrate the girl 's birthday.\",\n",
       " 'The girl and the woman blow out the candles on the cake together and laugh happily.',\n",
       " 'The woman picks up the candles.']"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vidor_captions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from caption2spec_pvsg import caption2spec\n",
    "\n",
    "vidor_spec = caption2spec(vidor_captions, cache_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "vidor_spec"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Epic Kitchen"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['The man opened the oven to check.',\n",
       " 'The man picked up a mat, wiped his hands, and placed it on the countertop.',\n",
       " 'The man picked up a plate from the stove and placed it in the cabinet.',\n",
       " 'The man picked up the mat, opened the oven, used the mat to take out the pizza wrapped in paper, placed it on the stove, and then used the mat to close the oven.',\n",
       " 'The man placed the mat back on the countertop, sprinkled toppings on the pizza, and threw the remaining items into the dustbin.',\n",
       " 'The man walked back to the countertop, picked up a spatula, and a scraper.',\n",
       " 'The man used the spatula and scraper to slice the pizza.',\n",
       " 'The man put down the scraper, picked up a plate from the table, and walked back to the stove.',\n",
       " 'The man used the spatula to lift a piece of pizza onto the plate.',\n",
       " 'The man used the spatula to lift another piece of pizza onto the plate.',\n",
       " 'The man held the plate and placed it on the mat.']"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "epic_kitchen_captions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "from caption2spec_pvsg import caption2spec\n",
    "\n",
    "epic_kitchen_spec = caption2spec(epic_kitchen_captions, cache_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A man carries a child and walks to the left from behind a woman holding another child.': {'caption': 'A man carries a child and walks to the left from behind a woman holding another child.',\n",
       "  'sequential descriptions': ['man A carry child B, women C hold child D, man A is behind women C',\n",
       "   'man A walk',\n",
       "   'man A at left'],\n",
       "  'time stamps': {'1': {'description': ['man A carry child B',\n",
       "     'women C hold child D',\n",
       "     'man A is behind women C'],\n",
       "    'programmatic': ['binary(carry, A, B)',\n",
       "     'name(A, man)',\n",
       "     'name(B, child)',\n",
       "     'binary(hold, C, D)',\n",
       "     'name(C, women)',\n",
       "     'name(D, child)',\n",
       "     'binary(behind, A, C)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man A walk'],\n",
       "    'programmatic': ['unary(walk, A)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['man A at left'],\n",
       "    'programmatic': ['unary(left, A)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The woman rocks and holds the child, singing a birthday song together with another woman to celebrate the birthday of the girl.': {'caption': 'The woman rocks and holds the child, singing a birthday song together with another woman to celebrate the birthday of the girl.',\n",
       "  'sequential descriptions': ['woman A rocks and holds the child B, woman A and women C sings birthday song'],\n",
       "  'time stamps': {'1': {'description': ['woman A rocks and holds the child B, woman A and women C sings birthday song'],\n",
       "    'programmatic': ['binary(rock, A, B)',\n",
       "     'binary(hold, A, B)',\n",
       "     'unary(sing, A)',\n",
       "     'unary(sing, C)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " 'I adjusted my cellphone and continued playing the ukulele.': {'caption': 'I adjusted my cellphone and continued playing the ukulele.',\n",
       "  'sequential descriptions': ['person A adjust cellphone B',\n",
       "   'person A play ukulele C'],\n",
       "  'time stamps': {'1': {'description': ['person A adjust cellphone B'],\n",
       "    'programmatic': ['binary(adjust, A, B)',\n",
       "     'name(A, person)',\n",
       "     'name(B, cellphone)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['person A play ukulele C'],\n",
       "    'programmatic': ['binary(play, A, C)',\n",
       "     'name(A, person)',\n",
       "     'name(C, ukulele)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'late'}}},\n",
       " 'The woman smiles and holds the child in front of the cake.': {'caption': 'The woman smiles and holds the child in front of the cake.',\n",
       "  'sequential descriptions': ['woman A smiles and holds child B in front of cake C'],\n",
       "  'time stamps': {'1': {'description': ['woman A smiles and holds child B in front of cake C'],\n",
       "    'programmatic': ['unary(smile, A)',\n",
       "     'binary(hold, A, B)',\n",
       "     'binary(in_front_of, B, C)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, child)',\n",
       "     'name(C, cake)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " \"The camera shifts to the right, showing a woman, a man, a woman holding a baby, two men, and a woman holding a boy, all singing a birthday song to celebrate the girl's birthday.\": {'caption': \"The camera shifts to the right, showing a woman, a man, a woman holding a baby, two men, and a woman holding a boy, all singing a birthday song to celebrate the girl's birthday.\",\n",
       "  'sequential descriptions': ['camera shift right',\n",
       "   'woman A, man B, woman C hold baby D, man E, man F, woman G hold boy H sings birthday song'],\n",
       "  'time stamps': {'1': {'description': ['camera shift right'],\n",
       "    'programmatic': ['unary(shift_right, camera)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['woman A, man B, woman C hold baby D, man E, man F, woman G hold boy H sings birthday song'],\n",
       "    'programmatic': ['binary(hold, C, D)',\n",
       "     'binary(hold, G, H)',\n",
       "     'unary(sing, A)',\n",
       "     'unary(sing, B)',\n",
       "     'unary(sing, C)',\n",
       "     'unary(sing, E)',\n",
       "     'unary(sing, F)',\n",
       "     'unary(sing, G)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, man)',\n",
       "     'name(C, woman)',\n",
       "     'name(D, baby)',\n",
       "     'name(E, man)',\n",
       "     'name(F, man)',\n",
       "     'name(G, woman)',\n",
       "     'name(H, boy)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " \"The woman on the left side, holding the child, and the man and woman are singing a birthday song to celebrate the girl's birthday.\": {'caption': \"The woman on the left side, holding the child, and the man and woman are singing a birthday song to celebrate the girl's birthday.\",\n",
       "  'sequential descriptions': ['woman A on left side holding child B, man C and woman D sing birthday song'],\n",
       "  'time stamps': {'1': {'description': ['woman A on left side holding child B, man C and woman D sing birthday song'],\n",
       "    'programmatic': ['unary(left, A)',\n",
       "     'binary(hold, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, child)',\n",
       "     'unary(sing, C)',\n",
       "     'unary(sing, D)',\n",
       "     'name(C, man)',\n",
       "     'name(D, woman)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " 'The girl and the woman blow out the candles on the cake together and laugh happily.': {'caption': 'The girl and the woman blow out the candles on the cake together and laugh happily.',\n",
       "  'sequential descriptions': ['girl A and woman B blow out candles on cake C',\n",
       "   'girl A and woman B laugh'],\n",
       "  'time stamps': {'1': {'description': ['girl A and woman B blow out candles on cake C'],\n",
       "    'programmatic': ['binary(blow_out, A, C)',\n",
       "     'binary(blow_out, B, C)',\n",
       "     'name(A, girl)',\n",
       "     'name(B, woman)',\n",
       "     'name(C, cake)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['girl A and woman B laugh'],\n",
       "    'programmatic': ['unary(laugh, A)', 'unary(laugh, B)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The woman picks up the candles.': {'caption': 'The woman picks up the candles.',\n",
       "  'sequential descriptions': ['woman A picks up candles B'],\n",
       "  'time stamps': {'1': {'description': ['woman A picks up candles B'],\n",
       "    'programmatic': ['binary(pick_up, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, candles)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'A woman is teasing a kitten with a piece of meat, and the kitten is peeking its head from a chair to look at the meat.': {'caption': 'A woman is teasing a kitten with a piece of meat, and the kitten is peeking its head from a chair to look at the meat.',\n",
       "  'sequential descriptions': ['woman A tease kitten B with meat C',\n",
       "   'kitten B peek head from chair to look at meat C'],\n",
       "  'time stamps': {'1': {'description': ['woman A tease kitten B with meat C'],\n",
       "    'programmatic': ['binary(tease, A, B)',\n",
       "     'binary(with, A, C)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, kitten)',\n",
       "     'name(C, meat)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['kitten B peek head from chair to look at meat C'],\n",
       "    'programmatic': ['unary(peek, B)',\n",
       "     'binary(from, B, chair)',\n",
       "     'binary(look_at, B, C)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The camera zooms in on the kitten, and the kitten is looking at the camera.': {'caption': 'The camera zooms in on the kitten, and the kitten is looking at the camera.',\n",
       "  'sequential descriptions': ['camera A zoom in on kitten B',\n",
       "   'kitten B look at camera A'],\n",
       "  'time stamps': {'1': {'description': ['camera A zoom in on kitten B'],\n",
       "    'programmatic': ['binary(zoom_in_on, A, B)',\n",
       "     'name(A, camera)',\n",
       "     'name(B, kitten)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['kitten B look at camera A'],\n",
       "    'programmatic': ['binary(look_at, B, A)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The woman is using a knife to prepare the meat, and the kitten is watching the meat.': {'caption': 'The woman is using a knife to prepare the meat, and the kitten is watching the meat.',\n",
       "  'sequential descriptions': ['woman A use knife to prepare meat B',\n",
       "   'kitten C watch meat B'],\n",
       "  'time stamps': {'1': {'description': ['woman A use knife to prepare meat B'],\n",
       "    'programmatic': ['binary(use, A, knife)',\n",
       "     'binary(prepare, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, meat)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['kitten C watch meat B'],\n",
       "    'programmatic': ['binary(watch, C, B)', 'name(C, kitten)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The woman continues to use the knife to prepare the meat, and the kitten looks at the camera for a while and then looks at the meat.': {'caption': 'The woman continues to use the knife to prepare the meat, and the kitten looks at the camera for a while and then looks at the meat.',\n",
       "  'sequential descriptions': ['woman A use knife B to prepare meat C',\n",
       "   'kitten D looks at camera',\n",
       "   'kitten D looks at meat C'],\n",
       "  'time stamps': {'1': {'description': ['woman A use knife B to prepare meat C'],\n",
       "    'programmatic': ['binary(use, A, B)',\n",
       "     'binary(prepare, A, C)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, knife)',\n",
       "     'name(C, meat)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['kitten D looks at camera',\n",
       "     'kitten D looks at meat C'],\n",
       "    'programmatic': ['unary(look, D)',\n",
       "     'binary(look, D, C)',\n",
       "     'name(D, kitten)',\n",
       "     'name(C, meat)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The little girl picks up a ball from the floor.': {'caption': 'The little girl picks up a ball from the floor.',\n",
       "  'sequential descriptions': ['girl A picks up ball B from floor'],\n",
       "  'time stamps': {'1': {'description': ['girl A picks up ball B from floor'],\n",
       "    'programmatic': ['binary(pick_up, A, B)',\n",
       "     'name(A, girl)',\n",
       "     'name(B, ball)',\n",
       "     'unary(floor, B)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The little girl throws a ball.': {'caption': 'The little girl throws a ball.',\n",
       "  'sequential descriptions': ['girl A throws ball B'],\n",
       "  'time stamps': {'1': {'description': ['girl A throws ball B'],\n",
       "    'programmatic': ['binary(throw, A, B)', 'name(A, girl)', 'name(B, ball)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'An adult woman walks over and picks up the ball.': {'caption': 'An adult woman walks over and picks up the ball.',\n",
       "  'sequential descriptions': ['woman A walks over', 'woman A picks up ball B'],\n",
       "  'time stamps': {'1': {'description': ['woman A walks over'],\n",
       "    'programmatic': ['unary(walk_over, A)', 'name(A, woman)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['woman A picks up ball B'],\n",
       "    'programmatic': ['binary(pick_up, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, ball)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The woman walks back, holding the ball.': {'caption': 'The woman walks back, holding the ball.',\n",
       "  'sequential descriptions': ['woman A walks back', 'woman A holds the ball'],\n",
       "  'time stamps': {'1': {'description': ['woman A walks back'],\n",
       "    'programmatic': ['unary(walk_back, A)', 'name(A, woman)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['woman A holds the ball'],\n",
       "    'programmatic': ['binary(hold, A, B)', 'name(A, woman)', 'name(B, ball)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The little girl runs behind the woman.': {'caption': 'The little girl runs behind the woman.',\n",
       "  'sequential descriptions': ['girl A runs', 'girl A is behind woman B'],\n",
       "  'time stamps': {'1': {'description': ['girl A runs'],\n",
       "    'programmatic': ['unary(run, A)', 'name(A, girl)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['girl A is behind woman B'],\n",
       "    'programmatic': ['binary(behind, A, B)',\n",
       "     'name(A, girl)',\n",
       "     'name(B, woman)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'Another little girl runs over and picks up a ball.': {'caption': 'Another little girl runs over and picks up a ball.',\n",
       "  'sequential descriptions': ['girl C runs over', 'girl C picks up ball D'],\n",
       "  'time stamps': {'1': {'description': ['girl C runs over'],\n",
       "    'programmatic': ['unary(run_over, C)', 'name(C, girl)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['girl C picks up ball D'],\n",
       "    'programmatic': ['binary(pick_up, C, D)',\n",
       "     'name(C, girl)',\n",
       "     'name(D, ball)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The second little girl throws the ball forward.': {'caption': 'The second little girl throws the ball forward.',\n",
       "  'sequential descriptions': ['girl E throws ball F forward'],\n",
       "  'time stamps': {'1': {'description': ['girl E throws ball F forward'],\n",
       "    'programmatic': ['binary(throw_forward, E, F)',\n",
       "     'name(E, girl)',\n",
       "     'name(F, ball)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The woman throws the ball toward the second little girl.': {'caption': 'The woman throws the ball toward the second little girl.',\n",
       "  'sequential descriptions': ['woman G throws ball H toward girl I'],\n",
       "  'time stamps': {'1': {'description': ['woman G throws ball H toward girl I'],\n",
       "    'programmatic': ['binary(throw_toward, G, H)',\n",
       "     'binary(toward, H, I)',\n",
       "     'name(G, woman)',\n",
       "     'name(H, ball)',\n",
       "     'name(I, girl)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'late'}}},\n",
       " 'The second little girl runs over to pick up the ball.': {'caption': 'The second little girl runs over to pick up the ball.',\n",
       "  'sequential descriptions': ['girl B run', 'girl B pick up ball C'],\n",
       "  'time stamps': {'1': {'description': ['girl B run'],\n",
       "    'programmatic': ['unary(run, B)', 'name(B, girl)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['girl B pick up ball C'],\n",
       "    'programmatic': ['binary(pick_up, B, C)',\n",
       "     'name(B, girl)',\n",
       "     'name(C, ball)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The second little girl runs towards the woman from behind.': {'caption': 'The second little girl runs towards the woman from behind.',\n",
       "  'sequential descriptions': ['girl B run towards woman A from behind'],\n",
       "  'time stamps': {'1': {'description': ['girl B run towards woman A from behind'],\n",
       "    'programmatic': ['unary(run, B)',\n",
       "     'binary(towards, B, A)',\n",
       "     'binary(from_behind, B, A)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, girl)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The first little girl runs out from behind the woman.': {'caption': 'The first little girl runs out from behind the woman.',\n",
       "  'sequential descriptions': ['girl A run out from behind woman B'],\n",
       "  'time stamps': {'1': {'description': ['girl A run out from behind woman B'],\n",
       "    'programmatic': ['unary(run_out, A)',\n",
       "     'binary(from_behind, A, B)',\n",
       "     'name(A, girl)',\n",
       "     'name(B, woman)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The first little girl runs towards the woman.': {'caption': 'The first little girl runs towards the woman.',\n",
       "  'sequential descriptions': ['girl A run towards woman B'],\n",
       "  'time stamps': {'1': {'description': ['girl A run towards woman B'],\n",
       "    'programmatic': ['unary(run, A)',\n",
       "     'binary(towards, A, B)',\n",
       "     'name(A, girl)',\n",
       "     'name(B, woman)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The camera angle changes.': {'caption': 'The camera angle changes.',\n",
       "  'sequential descriptions': ['camera angle change'],\n",
       "  'time stamps': {'1': {'description': ['camera angle change'],\n",
       "    'programmatic': ['unary(change, camera_angle)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The first little girl is running.': {'caption': 'The first little girl is running.',\n",
       "  'sequential descriptions': ['girl A is running'],\n",
       "  'time stamps': {'1': {'description': ['girl A is running'],\n",
       "    'programmatic': ['unary(running, A)', 'name(A, girl)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The young boy walks through the television to pick up a gift.': {'caption': 'The young boy walks through the television to pick up a gift.',\n",
       "  'sequential descriptions': ['boy A walks through the television',\n",
       "   'boy A pick up gift B'],\n",
       "  'time stamps': {'1': {'description': ['boy A walks through the television'],\n",
       "    'programmatic': ['unary(walk, A)',\n",
       "     'name(A, boy)',\n",
       "     'binary(through, A, television)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['boy A pick up gift B'],\n",
       "    'programmatic': ['binary(pick_up, A, B)', 'name(A, boy)', 'name(B, gift)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The young boy hands the gift to a woman, who appears to be his mother.': {'caption': 'The young boy hands the gift to a woman, who appears to be his mother.',\n",
       "  'sequential descriptions': ['boy A hands gift B to woman C'],\n",
       "  'time stamps': {'1': {'description': ['boy A hands gift B to woman C'],\n",
       "    'programmatic': ['binary(hands, A, B)',\n",
       "     'binary(to, B, C)',\n",
       "     'name(A, boy)',\n",
       "     'name(B, gift)',\n",
       "     'name(C, woman)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The woman sets the gift aside.': {'caption': 'The woman sets the gift aside.',\n",
       "  'sequential descriptions': ['woman A sets gift B aside'],\n",
       "  'time stamps': {'1': {'description': ['woman A sets gift B aside'],\n",
       "    'programmatic': ['binary(sets_aside, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, gift)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The young boy walks towards the camera.': {'caption': 'The young boy walks towards the camera.',\n",
       "  'sequential descriptions': ['boy A walks towards the camera'],\n",
       "  'time stamps': {'1': {'description': ['boy A walks towards the camera'],\n",
       "    'programmatic': ['unary(walk, A)',\n",
       "     'binary(towards, A, camera)',\n",
       "     'name(A, boy)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The young boy receives another gift and sits on the floor.': {'caption': 'The young boy receives another gift and sits on the floor.',\n",
       "  'sequential descriptions': ['boy A receive gift B', 'boy A sit on floor'],\n",
       "  'time stamps': {'1': {'description': ['boy A receive gift B'],\n",
       "    'programmatic': ['binary(receive, A, B)', 'name(A, boy)', 'name(B, gift)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['boy A sit on floor'],\n",
       "    'programmatic': ['unary(sit, A)', 'location(A, floor)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The young boy unwraps the gift and takes out a box.': {'caption': 'The young boy unwraps the gift and takes out a box.',\n",
       "  'sequential descriptions': ['boy A unwrap gift B',\n",
       "   'boy A take out box C from gift B'],\n",
       "  'time stamps': {'1': {'description': ['boy A unwrap gift B'],\n",
       "    'programmatic': ['binary(unwrap, A, B)', 'name(A, boy)', 'name(B, gift)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['boy A take out box C from gift B'],\n",
       "    'programmatic': ['binary(take_out, A, C)',\n",
       "     'binary(from, C, B)',\n",
       "     'name(A, boy)',\n",
       "     'name(B, gift)',\n",
       "     'name(C, box)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The young boy holds the box and shows it to the surroundings.': {'caption': 'The young boy holds the box and shows it to the surroundings.',\n",
       "  'sequential descriptions': ['boy A hold box B',\n",
       "   'boy A show box B to surroundings'],\n",
       "  'time stamps': {'1': {'description': ['boy A hold box B'],\n",
       "    'programmatic': ['binary(hold, A, B)', 'name(A, boy)', 'name(B, box)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['boy A show box B to surroundings'],\n",
       "    'programmatic': ['binary(show, A, B)',\n",
       "     'location(B, surroundings)',\n",
       "     'name(A, boy)',\n",
       "     'name(B, box)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The young boy sits on the carpet.': {'caption': 'The young boy sits on the carpet.',\n",
       "  'sequential descriptions': ['boy A sit on carpet'],\n",
       "  'time stamps': {'1': {'description': ['boy A sit on carpet'],\n",
       "    'programmatic': ['unary(sit, A)', 'location(A, carpet)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'An adult is helping the baby to stand up.': {'caption': 'An adult is helping the baby to stand up.',\n",
       "  'sequential descriptions': ['adult A help baby B stand up'],\n",
       "  'time stamps': {'1': {'description': ['adult A help baby B stand up'],\n",
       "    'programmatic': ['binary(help, A, B)',\n",
       "     'unary(stand_up, B)',\n",
       "     'name(A, adult)',\n",
       "     'name(B, baby)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The baby stands up, and the adult lowers their head, letting the baby give them a kiss on the face.': {'caption': 'The baby stands up, and the adult lowers their head, letting the baby give them a kiss on the face.',\n",
       "  'sequential descriptions': ['baby A stand up',\n",
       "   'adult B lower head',\n",
       "   'baby A give kiss to adult B on face'],\n",
       "  'time stamps': {'1': {'description': ['baby A stand up'],\n",
       "    'programmatic': ['unary(stand, A)', 'name(A, baby)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['adult B lower head'],\n",
       "    'programmatic': ['unary(lower_head, B)', 'name(B, adult)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['baby A give kiss to adult B on face'],\n",
       "    'programmatic': ['binary(kiss, A, B)',\n",
       "     'name(A, baby)',\n",
       "     'name(B, adult)',\n",
       "     'unary(face, B)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The adult smiles and picks up the baby onto their lap.': {'caption': 'The adult smiles and picks up the baby onto their lap.',\n",
       "  'sequential descriptions': ['adult A smiles',\n",
       "   'adult A picks up baby B onto lap'],\n",
       "  'time stamps': {'1': {'description': ['adult A smiles'],\n",
       "    'programmatic': ['unary(smile, A)', 'name(A, adult)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['adult A picks up baby B onto lap'],\n",
       "    'programmatic': ['binary(pick_up, A, B)',\n",
       "     'binary(onto, B, lap_of_A)',\n",
       "     'name(A, adult)',\n",
       "     'name(B, baby)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The adult and the baby smile and look at the camera.': {'caption': 'The adult and the baby smile and look at the camera.',\n",
       "  'sequential descriptions': ['adult A and baby B smile',\n",
       "   'adult A and baby B look at camera'],\n",
       "  'time stamps': {'1': {'description': ['adult A and baby B smile'],\n",
       "    'programmatic': ['unary(smile, A)',\n",
       "     'unary(smile, B)',\n",
       "     'name(A, adult)',\n",
       "     'name(B, baby)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['adult A and baby B look at camera'],\n",
       "    'programmatic': ['binary(look_at, A, camera)',\n",
       "     'binary(look_at, B, camera)',\n",
       "     'name(A, adult)',\n",
       "     'name(B, baby)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'A man is teasing a kitten with a camera, and the kitten jumps up to grab the camera.': {'caption': 'A man is teasing a kitten with a camera, and the kitten jumps up to grab the camera.',\n",
       "  'sequential descriptions': ['man A tease kitten B with camera',\n",
       "   'kitten B jumps up to grab camera'],\n",
       "  'time stamps': {'1': {'description': ['man A tease kitten B with camera'],\n",
       "    'programmatic': ['binary(tease_with, A, camera)',\n",
       "     'binary(tease, A, B)',\n",
       "     'name(A, man)',\n",
       "     'name(B, kitten)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['kitten B jumps up to grab camera'],\n",
       "    'programmatic': ['unary(jump_up, B)',\n",
       "     'binary(grab, B, camera)',\n",
       "     'name(B, kitten)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The kitten falls off a mat, and the man helps it to get back up on the mat.': {'caption': 'The kitten falls off a mat, and the man helps it to get back up on the mat.',\n",
       "  'sequential descriptions': ['kitten A falls off mat',\n",
       "   'man B helps kitten A get back up on mat'],\n",
       "  'time stamps': {'1': {'description': ['kitten A falls off mat'],\n",
       "    'programmatic': ['unary(fall_off, A)',\n",
       "     'binary(fall_off_from, A, mat)',\n",
       "     'name(A, kitten)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man B helps kitten A get back up on mat'],\n",
       "    'programmatic': ['binary(help, B, A)',\n",
       "     'binary(get_back_up_on, A, mat)',\n",
       "     'name(A, kitten)',\n",
       "     'name(B, man)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'late'}}},\n",
       " 'The man continues to tease the kitten with the camera, and the kitten jumps up to grab the camera.': {'caption': 'The man continues to tease the kitten with the camera, and the kitten jumps up to grab the camera.',\n",
       "  'sequential descriptions': ['man A tease kitten B with camera C',\n",
       "   'kitten B jumps up',\n",
       "   'kitten B grab camera C'],\n",
       "  'time stamps': {'1': {'description': ['man A tease kitten B with camera C'],\n",
       "    'programmatic': ['binary(tease, A, B)',\n",
       "     'with(camera, C)',\n",
       "     'name(A, man)',\n",
       "     'name(B, kitten)',\n",
       "     'name(C, camera)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['kitten B jumps up'],\n",
       "    'programmatic': ['unary(jump_up, B)', 'name(B, kitten)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['kitten B grab camera C'],\n",
       "    'programmatic': ['binary(grab, B, C)',\n",
       "     'name(B, kitten)',\n",
       "     'name(C, camera)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The man stands and examines the camera, and the kitten looks at the camera.': {'caption': 'The man stands and examines the camera, and the kitten looks at the camera.',\n",
       "  'sequential descriptions': ['man A stands',\n",
       "   'man A examines camera B',\n",
       "   'kitten C looks at camera B'],\n",
       "  'time stamps': {'1': {'description': ['man A stands'],\n",
       "    'programmatic': ['unary(stand, A)', 'name(A, man)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man A examines camera B'],\n",
       "    'programmatic': ['binary(examine, A, B)',\n",
       "     'name(A, man)',\n",
       "     'name(B, camera)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['kitten C looks at camera B'],\n",
       "    'programmatic': ['binary(look_at, C, B)',\n",
       "     'name(C, kitten)',\n",
       "     'name(B, camera)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'A woman is playing the guitar, and a little girl dances around the woman as she plays.': {'caption': 'A woman is playing the guitar, and a little girl dances around the woman as she plays.',\n",
       "  'sequential descriptions': ['woman A plays guitar B',\n",
       "   'little girl C dances around woman A'],\n",
       "  'time stamps': {'1': {'description': ['woman A plays guitar B'],\n",
       "    'programmatic': ['binary(play, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, guitar)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['little girl C dances around woman A'],\n",
       "    'programmatic': ['binary(dance_around, C, A)',\n",
       "     'name(C, little_girl)',\n",
       "     'name(A, woman)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " 'The woman continues playing the guitar on her own.': {'caption': 'The woman continues playing the guitar on her own.',\n",
       "  'sequential descriptions': ['woman A plays guitar B on her own'],\n",
       "  'time stamps': {'1': {'description': ['woman A plays guitar B on her own'],\n",
       "    'programmatic': ['binary(play, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, guitar)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " \"The little girl brings a piece of paper and dances along with the sound of the woman's guitar.\": {'caption': \"The little girl brings a piece of paper and dances along with the sound of the woman's guitar.\",\n",
       "  'sequential descriptions': ['little girl A brings piece of paper B',\n",
       "   \"little girl A dances along with sound of woman C's guitar\"],\n",
       "  'time stamps': {'1': {'description': ['little girl A brings piece of paper B'],\n",
       "    'programmatic': ['binary(bring, A, B)',\n",
       "     'name(A, little_girl)',\n",
       "     'name(B, piece_of_paper)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': [\"little girl A dances along with sound of woman C's guitar\"],\n",
       "    'programmatic': [\"binary(dance_along_with, A, sound_of_C's_guitar)\",\n",
       "     'name(A, little_girl)',\n",
       "     'name(C, woman)'],\n",
       "    'duration': 'long',\n",
       "    'video location': 'mid'}}},\n",
       " 'An elderly person is speaking to three other adults.': {'caption': 'An elderly person is speaking to three other adults.',\n",
       "  'sequential descriptions': ['elderly person A speak to adults B, C, D'],\n",
       "  'time stamps': {'1': {'description': ['elderly person A speak to adults B, C, D'],\n",
       "    'programmatic': ['binary(speak, A, B)',\n",
       "     'binary(speak, A, C)',\n",
       "     'binary(speak, A, D)',\n",
       "     'name(A, elderly person)',\n",
       "     'name(B, adult)',\n",
       "     'name(C, adult)',\n",
       "     'name(D, adult)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'A woman is flipping through a book.': {'caption': 'A woman is flipping through a book.',\n",
       "  'sequential descriptions': ['woman A flip through book B'],\n",
       "  'time stamps': {'1': {'description': ['woman A flip through book B'],\n",
       "    'programmatic': ['binary(flip through, A, B)',\n",
       "     'name(A, woman)',\n",
       "     'name(B, book)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'An adult waves to call a little boy over, and the little boy walks up to one of the adults.': {'caption': 'An adult waves to call a little boy over, and the little boy walks up to one of the adults.',\n",
       "  'sequential descriptions': ['adult A wave to call boy B',\n",
       "   'boy B walk up to adult A'],\n",
       "  'time stamps': {'1': {'description': ['adult A wave to call boy B'],\n",
       "    'programmatic': ['binary(wave to call, A, B)',\n",
       "     'name(A, adult)',\n",
       "     'name(B, boy)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['boy B walk up to adult A'],\n",
       "    'programmatic': ['binary(walk up to, B, A)',\n",
       "     'name(A, adult)',\n",
       "     'name(B, boy)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The dog stands up from the sofa.': {'caption': 'The dog stands up from the sofa.',\n",
       "  'sequential descriptions': ['dog A stand up from sofa B'],\n",
       "  'time stamps': {'1': {'description': ['dog A stand up from sofa B'],\n",
       "    'programmatic': ['binary(stand up from, A, B)',\n",
       "     'name(A, dog)',\n",
       "     'name(B, sofa)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The dog touches the man with its nose.': {'caption': 'The dog touches the man with its nose.',\n",
       "  'sequential descriptions': ['dog A touch man B with nose'],\n",
       "  'time stamps': {'1': {'description': ['dog A touch man B with nose'],\n",
       "    'programmatic': ['binary(touch with nose, A, B)',\n",
       "     'name(A, dog)',\n",
       "     'name(B, man)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The dog scratches the man with its front paws.': {'caption': 'The dog scratches the man with its front paws.',\n",
       "  'sequential descriptions': ['dog A scratch man B with front paws'],\n",
       "  'time stamps': {'1': {'description': ['dog A scratch man B with front paws'],\n",
       "    'programmatic': ['binary(scratch, A, B)',\n",
       "     'name(A, dog)',\n",
       "     'name(B, man)',\n",
       "     'unary(front_paws, A)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The man sitting on the sofa turns around and plays with the dog.': {'caption': 'The man sitting on the sofa turns around and plays with the dog.',\n",
       "  'sequential descriptions': ['man A sitting on sofa',\n",
       "   'man A turns around',\n",
       "   'man A plays with dog B'],\n",
       "  'time stamps': {'1': {'description': ['man A sitting on sofa'],\n",
       "    'programmatic': ['binary(sit_on, A, B)', 'name(A, man)', 'name(B, sofa)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man A turns around'],\n",
       "    'programmatic': ['unary(turn_around, A)', 'name(A, man)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['man A plays with dog B'],\n",
       "    'programmatic': ['binary(play_with, A, B)',\n",
       "     'name(A, man)',\n",
       "     'name(B, dog)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'late'}}},\n",
       " 'The little baby is lying on a towel and touching a big dog.': {'caption': 'The little baby is lying on a towel and touching a big dog.',\n",
       "  'sequential descriptions': ['baby A lying on towel',\n",
       "   'baby A touching dog B'],\n",
       "  'time stamps': {'1': {'description': ['baby A lying on towel'],\n",
       "    'programmatic': ['binary(lying_on, A, B)',\n",
       "     'name(A, baby)',\n",
       "     'name(B, towel)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['baby A touching dog B'],\n",
       "    'programmatic': ['binary(touching, A, B)',\n",
       "     'name(A, baby)',\n",
       "     'name(B, dog)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The big dog nuzzles the face of the little baby.': {'caption': 'The big dog nuzzles the face of the little baby.',\n",
       "  'sequential descriptions': ['dog A nuzzles face of baby B'],\n",
       "  'time stamps': {'1': {'description': ['dog A nuzzles face of baby B'],\n",
       "    'programmatic': ['binary(nuzzles, A, B)',\n",
       "     'name(A, dog)',\n",
       "     'name(B, baby)',\n",
       "     'unary(face, B)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The little baby gazes at the big dog.': {'caption': 'The little baby gazes at the big dog.',\n",
       "  'sequential descriptions': ['baby A gazes at dog B'],\n",
       "  'time stamps': {'1': {'description': ['baby A gazes at dog B'],\n",
       "    'programmatic': ['binary(gazes_at, A, B)',\n",
       "     'name(A, baby)',\n",
       "     'name(B, dog)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The little baby touches the nose of the big dog.': {'caption': 'The little baby touches the nose of the big dog.',\n",
       "  'sequential descriptions': ['baby A touch nose of dog B'],\n",
       "  'time stamps': {'1': {'description': ['baby A touch nose of dog B'],\n",
       "    'programmatic': ['binary(touch, A, B)',\n",
       "     'name(A, baby)',\n",
       "     'binary(nose, B, dog)',\n",
       "     'name(B, dog)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The little baby continues to gaze at the big dog.': {'caption': 'The little baby continues to gaze at the big dog.',\n",
       "  'sequential descriptions': ['baby A gaze at dog B'],\n",
       "  'time stamps': {'1': {'description': ['baby A gaze at dog B'],\n",
       "    'programmatic': ['binary(gaze, A, B)', 'name(A, baby)', 'name(B, dog)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The little baby uses hands to touch the big dog.': {'caption': 'The little baby uses hands to touch the big dog.',\n",
       "  'sequential descriptions': ['baby A use hands touch dog B'],\n",
       "  'time stamps': {'1': {'description': ['baby A use hands touch dog B'],\n",
       "    'programmatic': ['binary(use, A, hands)',\n",
       "     'binary(touch, hands, B)',\n",
       "     'name(A, baby)',\n",
       "     'name(B, dog)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The little baby and the big dog lie next to each other on the towel.': {'caption': 'The little baby and the big dog lie next to each other on the towel.',\n",
       "  'sequential descriptions': ['baby A and dog B lie next on towel C'],\n",
       "  'time stamps': {'1': {'description': ['baby A and dog B lie next on towel C'],\n",
       "    'programmatic': ['binary(lie, A, C)',\n",
       "     'binary(lie, B, C)',\n",
       "     'binary(next, A, B)',\n",
       "     'name(A, baby)',\n",
       "     'name(B, dog)',\n",
       "     'name(C, towel)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The big dog lies on the towel and kicks its legs.': {'caption': 'The big dog lies on the towel and kicks its legs.',\n",
       "  'sequential descriptions': ['dog A lie on towel B', 'dog A kick legs'],\n",
       "  'time stamps': {'1': {'description': ['dog A lie on towel B'],\n",
       "    'programmatic': ['binary(lie, A, B)', 'name(A, dog)', 'name(B, towel)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['dog A kick legs'],\n",
       "    'programmatic': ['unary(kick, A)', 'binary(legs, A, dog)', 'name(A, dog)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'I dipped a brush in paint and brushed the door.': {'caption': 'I dipped a brush in paint and brushed the door.',\n",
       "  'sequential descriptions': ['person A dip brush B in paint C',\n",
       "   'person A brush door D with brush B'],\n",
       "  'time stamps': {'1': {'description': ['person A dip brush B in paint C'],\n",
       "    'programmatic': ['binary(dip, B, C)',\n",
       "     'name(A, person)',\n",
       "     'name(B, brush)',\n",
       "     'name(C, paint)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['person A brush door D with brush B'],\n",
       "    'programmatic': ['binary(brush, A, D)',\n",
       "     'name(A, person)',\n",
       "     'name(B, brush)',\n",
       "     'name(D, door)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'I set down the brush and picked up another brush to continue brushing the door.': {'caption': 'I set down the brush and picked up another brush to continue brushing the door.',\n",
       "  'sequential descriptions': ['person A set down brush B',\n",
       "   'person A pick up brush C',\n",
       "   'person A continue brushing door D with brush C'],\n",
       "  'time stamps': {'1': {'description': ['person A set down brush B'],\n",
       "    'programmatic': ['unary(set_down, B)',\n",
       "     'name(A, person)',\n",
       "     'name(B, brush)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['person A pick up brush C'],\n",
       "    'programmatic': ['binary(pick_up, A, C)',\n",
       "     'name(A, person)',\n",
       "     'name(C, brush)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['person A continue brushing door D with brush C'],\n",
       "    'programmatic': ['binary(brush, A, D)',\n",
       "     'name(A, person)',\n",
       "     'name(C, brush)',\n",
       "     'name(D, door)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'late'}}},\n",
       " 'I set down the brush.': {'caption': 'I set down the brush.',\n",
       "  'sequential descriptions': ['person A set down brush B'],\n",
       "  'time stamps': {'1': {'description': ['person A set down brush B'],\n",
       "    'programmatic': ['unary(set_down, B)',\n",
       "     'name(A, person)',\n",
       "     'name(B, brush)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The man opened the oven to check.': {'caption': 'The man opened the oven to check.',\n",
       "  'sequential descriptions': ['man A open oven B, man A check'],\n",
       "  'time stamps': {'1': {'description': ['man A open oven B', 'man A check'],\n",
       "    'programmatic': ['binary(open, A, B)',\n",
       "     'name(A, man)',\n",
       "     'name(B, oven)',\n",
       "     'unary(check, A)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'}}},\n",
       " 'The man picked up a mat, wiped his hands, and placed it on the countertop.': {'caption': 'The man picked up a mat, wiped his hands, and placed it on the countertop.',\n",
       "  'sequential descriptions': ['man A pick up mat B',\n",
       "   'man A wipe hands',\n",
       "   'man A place mat B on countertop C'],\n",
       "  'time stamps': {'1': {'description': ['man A pick up mat B'],\n",
       "    'programmatic': ['binary(pick_up, A, B)', 'name(A, man)', 'name(B, mat)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man A wipe hands'],\n",
       "    'programmatic': ['unary(wipe_hands, A)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['man A place mat B on countertop C'],\n",
       "    'programmatic': ['binary(place, A, B)',\n",
       "     'binary(on, B, C)',\n",
       "     'name(C, countertop)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The man picked up a plate from the stove and placed it in the cabinet.': {'caption': 'The man picked up a plate from the stove and placed it in the cabinet.',\n",
       "  'sequential descriptions': ['man A pick up plate B from stove C',\n",
       "   'man A place plate B in cabinet D'],\n",
       "  'time stamps': {'1': {'description': ['man A pick up plate B from stove C'],\n",
       "    'programmatic': ['binary(pick_up, A, B)',\n",
       "     'binary(from, B, C)',\n",
       "     'name(A, man)',\n",
       "     'name(B, plate)',\n",
       "     'name(C, stove)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man A place plate B in cabinet D'],\n",
       "    'programmatic': ['binary(place, A, B)',\n",
       "     'binary(in, B, D)',\n",
       "     'name(D, cabinet)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The man picked up the mat, opened the oven, used the mat to take out the pizza wrapped in paper, placed it on the stove, and then used the mat to close the oven.': {'caption': 'The man picked up the mat, opened the oven, used the mat to take out the pizza wrapped in paper, placed it on the stove, and then used the mat to close the oven.',\n",
       "  'sequential descriptions': ['man A pick up mat B',\n",
       "   'man A open oven C',\n",
       "   'man A use mat B to take out pizza D wrapped in paper',\n",
       "   'man A place pizza D on stove E',\n",
       "   'man A use mat B to close oven C'],\n",
       "  'time stamps': {'1': {'description': ['man A pick up mat B'],\n",
       "    'programmatic': ['binary(pick_up, A, B)', 'name(A, man)', 'name(B, mat)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man A open oven C'],\n",
       "    'programmatic': ['binary(open, A, C)', 'name(C, oven)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '3': {'description': ['man A use mat B to take out pizza D wrapped in paper'],\n",
       "    'programmatic': ['binary(use, A, B)',\n",
       "     'binary(take_out, A, D)',\n",
       "     'binary(wrapped_in, D, paper)',\n",
       "     'name(D, pizza)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'},\n",
       "   '4': {'description': ['man A place pizza D on stove E'],\n",
       "    'programmatic': ['binary(place, A, D)',\n",
       "     'binary(on, D, E)',\n",
       "     'name(E, stove)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'},\n",
       "   '5': {'description': ['man A use mat B to close oven C'],\n",
       "    'programmatic': ['binary(use, A, B)', 'binary(close, A, C)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The man placed the mat back on the countertop, sprinkled toppings on the pizza, and threw the remaining items into the dustbin.': {'caption': 'The man placed the mat back on the countertop, sprinkled toppings on the pizza, and threw the remaining items into the dustbin.',\n",
       "  'sequential descriptions': ['man A place mat B back on countertop C',\n",
       "   'man A sprinkle toppings on pizza D',\n",
       "   'man A throw remaining items into dustbin E'],\n",
       "  'time stamps': {'1': {'description': ['man A place mat B back on countertop C'],\n",
       "    'programmatic': ['binary(place_back, A, B)',\n",
       "     'binary(on, B, C)',\n",
       "     'name(C, countertop)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man A sprinkle toppings on pizza D'],\n",
       "    'programmatic': ['binary(sprinkle, A, toppings)',\n",
       "     'binary(on, toppings, D)',\n",
       "     'name(D, pizza)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['man A throw remaining items into dustbin E'],\n",
       "    'programmatic': ['binary(throw, A, remaining_items)',\n",
       "     'binary(into, remaining_items, E)',\n",
       "     'name(E, dustbin)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The man walked back to the countertop, picked up a spatula, and a scraper.': {'caption': 'The man walked back to the countertop, picked up a spatula, and a scraper.',\n",
       "  'sequential descriptions': ['man A walk back to countertop B',\n",
       "   'man A pick up spatula C',\n",
       "   'man A pick up scraper D'],\n",
       "  'time stamps': {'1': {'description': ['man A walk back to countertop B'],\n",
       "    'programmatic': ['binary(walk_back, A, B)', 'name(B, countertop)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man A pick up spatula C', 'man A pick up scraper D'],\n",
       "    'programmatic': ['binary(pick_up, A, C)',\n",
       "     'binary(pick_up, A, D)',\n",
       "     'name(C, spatula)',\n",
       "     'name(D, scraper)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The man used the spatula and scraper to slice the pizza.': {'caption': 'The man used the spatula and scraper to slice the pizza.',\n",
       "  'sequential descriptions': ['man A use spatula B and scraper C to slice pizza D'],\n",
       "  'time stamps': {'1': {'description': ['man A use spatula B and scraper C to slice pizza D'],\n",
       "    'programmatic': ['binary(use, A, B)',\n",
       "     'binary(use, A, C)',\n",
       "     'binary(slice, A, D)',\n",
       "     'name(B, spatula)',\n",
       "     'name(C, scraper)',\n",
       "     'name(D, pizza)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'}}},\n",
       " 'The man put down the scraper, picked up a plate from the table, and walked back to the stove.': {'caption': 'The man put down the scraper, picked up a plate from the table, and walked back to the stove.',\n",
       "  'sequential descriptions': ['man A put down scraper B',\n",
       "   'man A pick up plate C from table D',\n",
       "   'man A walk back to stove E'],\n",
       "  'time stamps': {'1': {'description': ['man A put down scraper B'],\n",
       "    'programmatic': ['binary(put_down, A, B)', 'name(B, scraper)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man A pick up plate C from table D'],\n",
       "    'programmatic': ['binary(pick_up, A, C)',\n",
       "     'binary(from, C, D)',\n",
       "     'name(C, plate)',\n",
       "     'name(D, table)'],\n",
       "    'duration': 'medium',\n",
       "    'video location': 'mid'},\n",
       "   '3': {'description': ['man A walk back to stove E'],\n",
       "    'programmatic': ['binary(walk_back, A, E)', 'name(E, stove)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}},\n",
       " 'The man used the spatula to lift a piece of pizza onto the plate.': {'caption': 'The man used the spatula to lift a piece of pizza onto the plate.',\n",
       "  'sequential descriptions': ['man A use spatula B to lift piece of pizza C onto plate D'],\n",
       "  'time stamps': {'1': {'description': ['man A use spatula B to lift piece of pizza C onto plate D'],\n",
       "    'programmatic': ['binary(use, A, B)',\n",
       "     'binary(lift, A, C)',\n",
       "     'binary(onto, C, D)',\n",
       "     'name(B, spatula)',\n",
       "     'name(C, piece_of_pizza)',\n",
       "     'name(D, plate)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The man used the spatula to lift another piece of pizza onto the plate.': {'caption': 'The man used the spatula to lift another piece of pizza onto the plate.',\n",
       "  'sequential descriptions': ['man A use spatula B to lift another piece of pizza C onto plate D'],\n",
       "  'time stamps': {'1': {'description': ['man A use spatula B to lift another piece of pizza C onto plate D'],\n",
       "    'programmatic': ['binary(use, A, B)',\n",
       "     'binary(lift, A, C)',\n",
       "     'binary(onto, C, D)',\n",
       "     'name(B, spatula)',\n",
       "     'name(C, another_piece_of_pizza)',\n",
       "     'name(D, plate)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'mid'}}},\n",
       " 'The man held the plate and placed it on the mat.': {'caption': 'The man held the plate and placed it on the mat.',\n",
       "  'sequential descriptions': ['man A hold plate B',\n",
       "   'man A place plate B on mat C'],\n",
       "  'time stamps': {'1': {'description': ['man A hold plate B'],\n",
       "    'programmatic': ['binary(hold, A, B)', 'name(A, man)', 'name(B, plate)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'early'},\n",
       "   '2': {'description': ['man A place plate B on mat C'],\n",
       "    'programmatic': ['binary(place, A, B)',\n",
       "     'binary(on, B, C)',\n",
       "     'name(C, mat)'],\n",
       "    'duration': 'short',\n",
       "    'video location': 'late'}}}}"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "epic_kitchen_spec"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "laser",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
