{
  "000019": {
    "text": "person jogs around to the left and right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.153118554637944,
      "Minus Multimodal Distance": -7.132046699523926,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4389368263655342e-05,
      "MoBERT-F": 0.46603931884931193,
      "MoBERT-N": 0.6076797995888948,
      "MoBERT-min(F/N)": 0.46603931884931193,
      "MoBERT-max(F/N)": 0.6076797995888948,
      "MotionCritic": -8.339241027832031,
      "VeMo (human-opt view)": 0.9525909592061742,
      "VeMo (max entropy view)": 0.9525909592061742,
      "VeMo (min entropy view)": 0.9647812166488794,
      "VeMo (random view)": 0.9647812166488794,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jogs from side to side."
  },
  "000021": {
    "text": "person is walking normally in a circle",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.7092008513884531,
      "Minus Multimodal Distance": -11.223453521728516,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.331017458345741e-05,
      "MoBERT-F": 0.5030041767377458,
      "MoBERT-N": 0.5549354947390328,
      "MoBERT-min(F/N)": 0.5030041767377458,
      "MoBERT-max(F/N)": 0.5549354947390328,
      "MotionCritic": -7.12754487991333,
      "VeMo (human-opt view)": 0.7545304777594728,
      "VeMo (max entropy view)": 0.7545304777594728,
      "VeMo (min entropy view)": 0.7987987987987988,
      "VeMo (random view)": 0.7545304777594728,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking normally in a circle."
  },
  "000063": {
    "text": "a person is drying their right arm",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1564919739365336,
      "Minus Multimodal Distance": -11.041521072387695,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5731445930432528e-05,
      "MoBERT-F": 0.43815579212828326,
      "MoBERT-N": 0.43590212192031325,
      "MoBERT-min(F/N)": 0.43590212192031325,
      "MoBERT-max(F/N)": 0.43815579212828326,
      "MotionCritic": -11.233529090881348,
      "VeMo (human-opt view)": 0.06387789711701526,
      "VeMo (max entropy view)": 0.34814814814814815,
      "VeMo (min entropy view)": 0.06387789711701526,
      "VeMo (random view)": 0.06387789711701526,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is drying their right arm."
  },
  "000072": {
    "text": "a person throws and upper cut with his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1174814998507467,
      "Minus Multimodal Distance": -7.0501580238342285,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0002149486099369824,
      "MoBERT-F": 0.5709659425366574,
      "MoBERT-N": 0.6112792557192052,
      "MoBERT-min(F/N)": 0.5709659425366574,
      "MoBERT-max(F/N)": 0.6112792557192052,
      "MotionCritic": -16.52629852294922,
      "VeMo (human-opt view)": 0.3923076923076923,
      "VeMo (max entropy view)": 0.45229681978798586,
      "VeMo (min entropy view)": 0.3923076923076923,
      "VeMo (random view)": 0.3923076923076923,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person throws an uppercut with his right hand."
  },
  "000076": {
    "text": "man reaches down to the left as to pick up item and then reaches to the right as if emptying item then replaces it to the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7678287922547183,
      "Minus Multimodal Distance": -10.32143783569336,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2837601136416197e-05,
      "MoBERT-F": 0.43661286712515507,
      "MoBERT-N": 0.5826439287364463,
      "MoBERT-min(F/N)": 0.43661286712515507,
      "MoBERT-max(F/N)": 0.5826439287364463,
      "MotionCritic": -5.99828577041626,
      "VeMo (human-opt view)": 0.20225776105362184,
      "VeMo (max entropy view)": 0.48484848484848486,
      "VeMo (min entropy view)": 0.20225776105362184,
      "VeMo (random view)": 0.48484848484848486,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man reaches down to the left as if to pick up an item, then reaches to the right as if emptying the item, and then replaces it to the left."
  },
  "000085": {
    "text": "using their left hand, the person holds the neck of an air guitar, and with their right hand, they make strumming motions.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4822394597278662,
      "Minus Multimodal Distance": -5.621583461761475,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4871411369531415e-05,
      "MoBERT-F": 0.3024944995527922,
      "MoBERT-N": 0.4459982898837073,
      "MoBERT-min(F/N)": 0.3024944995527922,
      "MoBERT-max(F/N)": 0.4459982898837073,
      "MotionCritic": -5.062144756317139,
      "VeMo (human-opt view)": 0.06003210272873194,
      "VeMo (max entropy view)": 0.17301414581066377,
      "VeMo (min entropy view)": 0.06003210272873194,
      "VeMo (random view)": 0.17301414581066377,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Using their left hand, the person holds the neck of an air guitar, and with their right hand, they make strumming motions."
  },
  "000088": {
    "text": "someone puts both of their hands on their chests and appears to be laughing. then waves their right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7247509035528585,
      "Minus Multimodal Distance": -7.120090007781982,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.367780871281866e-05,
      "MoBERT-F": 0.4068591870304932,
      "MoBERT-N": 0.48298588222706385,
      "MoBERT-min(F/N)": 0.4068591870304932,
      "MoBERT-max(F/N)": 0.48298588222706385,
      "MotionCritic": -3.0570545196533203,
      "VeMo (human-opt view)": 3.751383201072779e-05,
      "VeMo (max entropy view)": 3.751383201072779e-05,
      "VeMo (min entropy view)": 3.2059573888831455e-05,
      "VeMo (random view)": 3.2059573888831455e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone puts both hands on their chest and appears to be laughing. Then they wave their right hand."
  },
  "000091": {
    "text": "a person lays on the ground.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0982799045487444,
      "Minus Multimodal Distance": -15.759767532348633,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.03136738762259483,
      "MoBERT-F": 0.579187050531742,
      "MoBERT-N": 0.5629996226914162,
      "MoBERT-min(F/N)": 0.5629996226914162,
      "MoBERT-max(F/N)": 0.579187050531742,
      "MotionCritic": -10.255645751953125,
      "VeMo (human-opt view)": 0.9706307173808377,
      "VeMo (max entropy view)": 0.9706307173808377,
      "VeMo (min entropy view)": 0.9903211885827015,
      "VeMo (random view)": 0.9706307173808377,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lies on the ground."
  },
  "000104": {
    "text": "a man walks forward and takes two steps, turns clockwise and then goesni to a sitting position from standing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.9214197351325129,
      "Minus Multimodal Distance": -3.8369767665863037,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.263983515149448e-05,
      "MoBERT-F": 0.4103200215702738,
      "MoBERT-N": 0.40530227086528775,
      "MoBERT-min(F/N)": 0.40530227086528775,
      "MoBERT-max(F/N)": 0.4103200215702738,
      "MotionCritic": -11.516014099121094,
      "VeMo (human-opt view)": 0.8175824175824176,
      "VeMo (max entropy view)": 0.8175824175824176,
      "VeMo (min entropy view)": 0.8671513549239921,
      "VeMo (random view)": 0.8671513549239921,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks forward and takes two steps, turns clockwise, and then goes into a sitting position from a standing position."
  },
  "000118": {
    "text": "person walks forwards slowly and normally without swinging arms",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7495759478643151,
      "Minus Multimodal Distance": -1.372260570526123,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.556778235884849e-05,
      "MoBERT-F": 0.47289192020961224,
      "MoBERT-N": 0.5796136572536355,
      "MoBERT-min(F/N)": 0.47289192020961224,
      "MoBERT-max(F/N)": 0.5796136572536355,
      "MotionCritic": -4.888526439666748,
      "VeMo (human-opt view)": 0.9100794646591385,
      "VeMo (max entropy view)": 0.8444040036396724,
      "VeMo (min entropy view)": 0.9100794646591385,
      "VeMo (random view)": 0.8444040036396724,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward slowly and normally without swinging their arms."
  },
  "000189": {
    "text": "a figure walking in a straight line, swinging their left arm far greater than the right arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.4531996099591797,
      "Minus Multimodal Distance": -4.038525104522705,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0019979653880000114,
      "MoBERT-F": 0.5235491034322932,
      "MoBERT-N": 0.556019584835543,
      "MoBERT-min(F/N)": 0.5235491034322932,
      "MoBERT-max(F/N)": 0.556019584835543,
      "MotionCritic": -12.024698257446289,
      "VeMo (human-opt view)": 0.056768558951965066,
      "VeMo (max entropy view)": 0.11905882352941176,
      "VeMo (min entropy view)": 0.056768558951965066,
      "VeMo (random view)": 0.11905882352941176,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure is walking in a straight line, swinging their left arm far more than their right arm."
  },
  "000246": {
    "text": "a person cautiously walks in an arc",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3206811331673443,
      "Minus Multimodal Distance": -9.040435791015625,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.4433917701244354,
      "MoBERT-F": 0.6894355703178161,
      "MoBERT-N": 0.6752793571592342,
      "MoBERT-min(F/N)": 0.6752793571592342,
      "MoBERT-max(F/N)": 0.6894355703178161,
      "MotionCritic": -4.887399196624756,
      "VeMo (human-opt view)": 0.8929503916449086,
      "VeMo (max entropy view)": 0.8181818181818182,
      "VeMo (min entropy view)": 0.8929503916449086,
      "VeMo (random view)": 0.8929503916449086,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person cautiously walks in an arc."
  },
  "000268": {
    "text": "someone slouched in a sitting position gets up and begins walking in a circle.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.620694150409964,
      "Minus Multimodal Distance": -9.789885520935059,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3009273718344048e-05,
      "MoBERT-F": 0.35284163050781403,
      "MoBERT-N": 0.3940295835924975,
      "MoBERT-min(F/N)": 0.35284163050781403,
      "MoBERT-max(F/N)": 0.3940295835924975,
      "MotionCritic": -15.99425983428955,
      "VeMo (human-opt view)": 0.9325952458482579,
      "VeMo (max entropy view)": 0.9324972334931759,
      "VeMo (min entropy view)": 0.9325952458482579,
      "VeMo (random view)": 0.9325952458482579,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone who was slouched in a sitting position gets up and begins walking in a circle."
  },
  "000274": {
    "text": "a man walks forward and raises both his arms and then drop his arms .",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9841567545275463,
      "Minus Multimodal Distance": -6.411865711212158,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 9.102624608203769e-05,
      "MoBERT-F": 0.47918275076753186,
      "MoBERT-N": 0.5936832291251629,
      "MoBERT-min(F/N)": 0.47918275076753186,
      "MoBERT-max(F/N)": 0.5936832291251629,
      "MotionCritic": -9.411417007446289,
      "VeMo (human-opt view)": 0.9940936531244902,
      "VeMo (max entropy view)": 0.9875761652290892,
      "VeMo (min entropy view)": 0.9940936531244902,
      "VeMo (random view)": 0.9940936531244902,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks forward, raises both his arms, and then drops his arms."
  },
  "000296": {
    "text": "someone working out the right arm",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9902622123746606,
      "Minus Multimodal Distance": -9.691346168518066,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.450082502036821e-05,
      "MoBERT-F": 0.43066558995519955,
      "MoBERT-N": 0.5355908699383706,
      "MoBERT-min(F/N)": 0.43066558995519955,
      "MoBERT-max(F/N)": 0.5355908699383706,
      "MotionCritic": -8.15604305267334,
      "VeMo (human-opt view)": 0.5461847389558233,
      "VeMo (max entropy view)": 0.5461847389558233,
      "VeMo (min entropy view)": 0.851664984863774,
      "VeMo (random view)": 0.5461847389558233,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone is working out the right arm."
  },
  "000299": {
    "text": "a person walks backwards and then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8586345747021028,
      "Minus Multimodal Distance": -9.564203262329102,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0023987358435988426,
      "MoBERT-F": 0.5724827865585567,
      "MoBERT-N": 0.5631676163255961,
      "MoBERT-min(F/N)": 0.5631676163255961,
      "MoBERT-max(F/N)": 0.5724827865585567,
      "MotionCritic": -5.746487140655518,
      "VeMo (human-opt view)": 0.9497716894977168,
      "VeMo (max entropy view)": 0.9497716894977168,
      "VeMo (min entropy view)": 0.9924491967769726,
      "VeMo (random view)": 0.9924491967769726,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks backwards and then stops."
  },
  "000302": {
    "text": "a person goes into a ducking position like they are shielding themselves from something",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0628170049984453,
      "Minus Multimodal Distance": -7.442766189575195,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0002373581228312105,
      "MoBERT-F": 0.5574371393166836,
      "MoBERT-N": 0.47509025234150337,
      "MoBERT-min(F/N)": 0.47509025234150337,
      "MoBERT-max(F/N)": 0.5574371393166836,
      "MotionCritic": -9.07675552368164,
      "VeMo (human-opt view)": 0.003377110694183865,
      "VeMo (max entropy view)": 0.009735026755205984,
      "VeMo (min entropy view)": 0.003377110694183865,
      "VeMo (random view)": 0.003377110694183865,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person assumes a ducking position as if they are shielding themselves from something."
  },
  "000304": {
    "text": "a person is walking in a steady forward motion.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4019808850883324,
      "Minus Multimodal Distance": -10.541022300720215,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8599399924278259,
      "MoBERT-F": 0.840078380289788,
      "MoBERT-N": 0.8008326250133289,
      "MoBERT-min(F/N)": 0.8008326250133289,
      "MoBERT-max(F/N)": 0.840078380289788,
      "MotionCritic": -6.610739231109619,
      "VeMo (human-opt view)": 0.9951097178683386,
      "VeMo (max entropy view)": 0.995093635546956,
      "VeMo (min entropy view)": 0.9951097178683386,
      "VeMo (random view)": 0.9951097178683386,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking in a steady forward motion."
  },
  "000306": {
    "text": "a person picks up two objects and begins performing a motioning action with both objects.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9781331679549335,
      "Minus Multimodal Distance": -9.472319602966309,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.3124876916408539,
      "MoBERT-F": 0.5046477128884069,
      "MoBERT-N": 0.4772211302409981,
      "MoBERT-min(F/N)": 0.4772211302409981,
      "MoBERT-max(F/N)": 0.5046477128884069,
      "MotionCritic": -8.063634872436523,
      "VeMo (human-opt view)": 1.3402810754992701e-05,
      "VeMo (max entropy view)": 0.0003139924065941148,
      "VeMo (min entropy view)": 1.3402810754992701e-05,
      "VeMo (random view)": 0.0003139924065941148,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person picks up two objects and begins performing a motioning action with both objects."
  },
  "000307": {
    "text": "the figure takes a few slighly hurried steps without raising their arms, it looks they are about to start running but haven't quite yet begun.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.882424207749826,
      "Minus Multimodal Distance": -6.030904769897461,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6865902327699587e-05,
      "MoBERT-F": 0.3992605442880406,
      "MoBERT-N": 0.5562038214056723,
      "MoBERT-min(F/N)": 0.3992605442880406,
      "MoBERT-max(F/N)": 0.5562038214056723,
      "MotionCritic": -13.968828201293945,
      "VeMo (human-opt view)": 3.84193034275114e-06,
      "VeMo (max entropy view)": 4.842772427509335e-05,
      "VeMo (min entropy view)": 3.84193034275114e-06,
      "VeMo (random view)": 3.84193034275114e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The figure takes a few slightly hurried steps without raising their arms. It looks as if they are about to start running but haven't quite begun yet."
  },
  "000324": {
    "text": "a person bounce sup and down on the balls of their feet.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8007230664322668,
      "Minus Multimodal Distance": -4.621706485748291,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8500686287879944,
      "MoBERT-F": 0.6321198787732634,
      "MoBERT-N": 0.5204769237341395,
      "MoBERT-min(F/N)": 0.5204769237341395,
      "MoBERT-max(F/N)": 0.6321198787732634,
      "MotionCritic": 1.234005093574524,
      "VeMo (human-opt view)": 0.7653213751868461,
      "VeMo (max entropy view)": 0.7653213751868461,
      "VeMo (min entropy view)": 0.7666666666666667,
      "VeMo (random view)": 0.7666666666666667,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bounces up and down on the balls of their feet."
  },
  "000337": {
    "text": "a person walks down stairs while holding a railing with his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4383669135705164,
      "Minus Multimodal Distance": -4.439115047454834,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.711818499141373e-05,
      "MoBERT-F": 0.28673276774843515,
      "MoBERT-N": 0.4770312044414448,
      "MoBERT-min(F/N)": 0.28673276774843515,
      "MoBERT-max(F/N)": 0.4770312044414448,
      "MotionCritic": -7.782186031341553,
      "VeMo (human-opt view)": 1.2601627188906438e-05,
      "VeMo (max entropy view)": 1.4723176179935417e-05,
      "VeMo (min entropy view)": 1.2601627188906438e-05,
      "VeMo (random view)": 1.4723176179935417e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks down the stairs while holding the railing with his right hand."
  },
  "000344": {
    "text": "a person uses the left arm to demonstrate throwing an object in front of them",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2551961553003994,
      "Minus Multimodal Distance": -6.04672908782959,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.13851337134838104,
      "MoBERT-F": 0.6909693954461327,
      "MoBERT-N": 0.5163309343538276,
      "MoBERT-min(F/N)": 0.5163309343538276,
      "MoBERT-max(F/N)": 0.6909693954461327,
      "MotionCritic": -8.189400672912598,
      "VeMo (human-opt view)": 0.7431906614785992,
      "VeMo (max entropy view)": 0.7431906614785992,
      "VeMo (min entropy view)": 0.8171275646743978,
      "VeMo (random view)": 0.7431906614785992,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person uses their left arm to demonstrate throwing an object in front of them."
  },
  "000348": {
    "text": "a person kicks their right leg up twice while hopping between feet, then punches their right arm",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.9084640088069535,
      "Minus Multimodal Distance": -8.51688289642334,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8710208425763994e-05,
      "MoBERT-F": 0.4888775552102522,
      "MoBERT-N": 0.5221332505969826,
      "MoBERT-min(F/N)": 0.4888775552102522,
      "MoBERT-max(F/N)": 0.5221332505969826,
      "MotionCritic": -8.760869979858398,
      "VeMo (human-opt view)": 0.5765407554671969,
      "VeMo (max entropy view)": 0.5765407554671969,
      "VeMo (min entropy view)": 0.6506410256410257,
      "VeMo (random view)": 0.5765407554671969,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person kicks their right leg up twice while hopping from one foot to the other, then punches with their right arm."
  },
  "000352": {
    "text": "person looks like theyre holding a microphone and talking into it",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8670618750420487,
      "Minus Multimodal Distance": -8.153679847717285,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.934232154279016e-05,
      "MoBERT-F": 0.405708758722475,
      "MoBERT-N": 0.3570438386447382,
      "MoBERT-min(F/N)": 0.3570438386447382,
      "MoBERT-max(F/N)": 0.405708758722475,
      "MotionCritic": -6.372888565063477,
      "VeMo (human-opt view)": 0.0029774991519780896,
      "VeMo (max entropy view)": 0.0029774991519780896,
      "VeMo (min entropy view)": 1.4286687557626618e-05,
      "VeMo (random view)": 1.4286687557626618e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person looks like they're holding a microphone and talking into it."
  },
  "000374": {
    "text": "a person walks forward one foot in front of another",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9274966467236432,
      "Minus Multimodal Distance": -4.974479675292969,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2484315195470117e-05,
      "MoBERT-F": 0.3794986367994665,
      "MoBERT-N": 0.5365366109714472,
      "MoBERT-min(F/N)": 0.3794986367994665,
      "MoBERT-max(F/N)": 0.5365366109714472,
      "MotionCritic": -1.5659897327423096,
      "VeMo (human-opt view)": 0.9602649006622517,
      "VeMo (max entropy view)": 0.9326633165829146,
      "VeMo (min entropy view)": 0.9602649006622517,
      "VeMo (random view)": 0.9326633165829146,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward with one foot in front of the other."
  },
  "000389": {
    "text": "a standing man loses a little bit of balance and his upper body leans and shakes toward his left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.941624059374782,
      "Minus Multimodal Distance": -3.499847173690796,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3875692932051606e-05,
      "MoBERT-F": 0.41217277599941626,
      "MoBERT-N": 0.4929360015287117,
      "MoBERT-min(F/N)": 0.41217277599941626,
      "MoBERT-max(F/N)": 0.4929360015287117,
      "MotionCritic": -5.919729709625244,
      "VeMo (human-opt view)": 0.4538878842676311,
      "VeMo (max entropy view)": 0.4538878842676311,
      "VeMo (min entropy view)": 0.6072106261859582,
      "VeMo (random view)": 0.6072106261859582,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A standing man loses a little bit of balance, and his upper body leans and shakes to his left."
  },
  "000421": {
    "text": "a person slowly walks down some stairs.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0751358288239896,
      "Minus Multimodal Distance": -5.636614799499512,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2457186787505634e-05,
      "MoBERT-F": 0.4437599094971971,
      "MoBERT-N": 0.6384154223658753,
      "MoBERT-min(F/N)": 0.4437599094971971,
      "MoBERT-max(F/N)": 0.6384154223658753,
      "MotionCritic": -9.594125747680664,
      "VeMo (human-opt view)": 0.019188667095943336,
      "VeMo (max entropy view)": 0.03522857611010978,
      "VeMo (min entropy view)": 0.019188667095943336,
      "VeMo (random view)": 0.03522857611010978,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person slowly walks down some stairs."
  },
  "000429": {
    "text": "a person walks forward, and repeatedly reaches down then shakes something",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6408017606634724,
      "Minus Multimodal Distance": -10.810882568359375,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.362929080845788e-05,
      "MoBERT-F": 0.39099450983072054,
      "MoBERT-N": 0.5056255297486703,
      "MoBERT-min(F/N)": 0.39099450983072054,
      "MoBERT-max(F/N)": 0.5056255297486703,
      "MotionCritic": -7.228431224822998,
      "VeMo (human-opt view)": 0.001416235726367072,
      "VeMo (max entropy view)": 0.002976081125766984,
      "VeMo (min entropy view)": 0.001416235726367072,
      "VeMo (random view)": 0.002976081125766984,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward and repeatedly reaches down and then shakes something."
  },
  "000439": {
    "text": "person is doing \"i'm the little teapot\" dance",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6160385789951104,
      "Minus Multimodal Distance": -8.438777923583984,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0003678211069200188,
      "MoBERT-F": 0.5677791109090131,
      "MoBERT-N": 0.616251649144955,
      "MoBERT-min(F/N)": 0.5677791109090131,
      "MoBERT-max(F/N)": 0.616251649144955,
      "MotionCritic": -7.362137317657471,
      "VeMo (human-opt view)": 0.5615942028985508,
      "VeMo (max entropy view)": 0.5615942028985508,
      "VeMo (min entropy view)": 0.3624454148471616,
      "VeMo (random view)": 0.3624454148471616,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is doing the \"I'm a Little Teapot\" dance."
  },
  "000490": {
    "text": "a person punches the air with their arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7540432635416006,
      "Minus Multimodal Distance": -6.855618000030518,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2600810552830808e-05,
      "MoBERT-F": 0.34781186528207614,
      "MoBERT-N": 0.48063234953188677,
      "MoBERT-min(F/N)": 0.34781186528207614,
      "MoBERT-max(F/N)": 0.48063234953188677,
      "MotionCritic": -6.605117321014404,
      "VeMo (human-opt view)": 0.6232741617357002,
      "VeMo (max entropy view)": 0.5316804407713499,
      "VeMo (min entropy view)": 0.6232741617357002,
      "VeMo (random view)": 0.6232741617357002,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person punches the air with their arms."
  },
  "000507": {
    "text": "a person while sitting on the floor throws with his right arm and then stands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7930110827327648,
      "Minus Multimodal Distance": -4.196874618530273,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.755176683422178e-05,
      "MoBERT-F": 0.3258452697455673,
      "MoBERT-N": 0.3381292050630356,
      "MoBERT-min(F/N)": 0.3258452697455673,
      "MoBERT-max(F/N)": 0.3381292050630356,
      "MotionCritic": -15.99425983428955,
      "VeMo (human-opt view)": 0.808,
      "VeMo (max entropy view)": 0.808,
      "VeMo (min entropy view)": 0.9362928797924474,
      "VeMo (random view)": 0.808,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person, while sitting on the floor, throws with his right arm and then stands up."
  },
  "000523": {
    "text": "a person repeatedly lunges forward and lifts their right armin front of their face.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7068432497992079,
      "Minus Multimodal Distance": -8.413294792175293,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.774854798801243e-05,
      "MoBERT-F": 0.34032057996668075,
      "MoBERT-N": 0.4092109361093971,
      "MoBERT-min(F/N)": 0.34032057996668075,
      "MoBERT-max(F/N)": 0.4092109361093971,
      "MotionCritic": -12.032875061035156,
      "VeMo (human-opt view)": 0.37745098039215685,
      "VeMo (max entropy view)": 0.5626204238921002,
      "VeMo (min entropy view)": 0.37745098039215685,
      "VeMo (random view)": 0.5626204238921002,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person repeatedly lunges forward and lifts their right arm in front of their face."
  },
  "000565": {
    "text": "an off balance intoxicated man gestures at another person to the left. seemingly in an argument.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6064695248849363,
      "Minus Multimodal Distance": -11.482000350952148,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0001834110007621348,
      "MoBERT-F": 0.517081619241482,
      "MoBERT-N": 0.4981520549364884,
      "MoBERT-min(F/N)": 0.4981520549364884,
      "MoBERT-max(F/N)": 0.517081619241482,
      "MotionCritic": -11.182611465454102,
      "VeMo (human-opt view)": 0.0012492401012752937,
      "VeMo (max entropy view)": 0.53125,
      "VeMo (min entropy view)": 0.0012492401012752937,
      "VeMo (random view)": 0.53125,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "An off - balance, intoxicated man gestures at another person to his left, seemingly in an argument."
  },
  "000571": {
    "text": "a person loses his balance towards his right and then steps back towards his left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7404964174533617,
      "Minus Multimodal Distance": -2.624166488647461,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.059989103348926e-05,
      "MoBERT-F": 0.42184033985246183,
      "MoBERT-N": 0.5805500733164419,
      "MoBERT-min(F/N)": 0.42184033985246183,
      "MoBERT-max(F/N)": 0.5805500733164419,
      "MotionCritic": -2.0682621002197266,
      "VeMo (human-opt view)": 0.9284201609487506,
      "VeMo (max entropy view)": 0.9284201609487506,
      "VeMo (min entropy view)": 0.9363867684478372,
      "VeMo (random view)": 0.9363867684478372,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person loses their balance to the right and then steps back to the left."
  },
  "000576": {
    "text": "a person walking around bouncing a ball.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.075884735860457,
      "Minus Multimodal Distance": -5.33584451675415,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8986510038375854,
      "MoBERT-F": 0.6579455674183727,
      "MoBERT-N": 0.45506810407521,
      "MoBERT-min(F/N)": 0.45506810407521,
      "MoBERT-max(F/N)": 0.6579455674183727,
      "MotionCritic": -4.502522945404053,
      "VeMo (human-opt view)": 0.00017398485033407688,
      "VeMo (max entropy view)": 0.0005887408816001159,
      "VeMo (min entropy view)": 0.00017398485033407688,
      "VeMo (random view)": 0.00017398485033407688,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking around, bouncing a ball."
  },
  "000580": {
    "text": "looks like he walks through a door to turn to the right and go straight forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.542353956542888,
      "Minus Multimodal Distance": -2.969869613647461,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.0228811738197692e-05,
      "MoBERT-F": 0.5398351287383608,
      "MoBERT-N": 0.5925043496108191,
      "MoBERT-min(F/N)": 0.5398351287383608,
      "MoBERT-max(F/N)": 0.5925043496108191,
      "MotionCritic": -12.557180404663086,
      "VeMo (human-opt view)": 0.6792452830188679,
      "VeMo (max entropy view)": 0.6792452830188679,
      "VeMo (min entropy view)": 0.6933333333333334,
      "VeMo (random view)": 0.6933333333333334,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "It looks like he walks through a door, turns to the right, and goes straight forward."
  },
  "000597": {
    "text": "the person is widewalking.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2272462132650337,
      "Minus Multimodal Distance": -9.045595169067383,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00860772654414177,
      "MoBERT-F": 0.7640082907711792,
      "MoBERT-N": 0.8105430174190921,
      "MoBERT-min(F/N)": 0.7640082907711792,
      "MoBERT-max(F/N)": 0.8105430174190921,
      "MotionCritic": -11.042762756347656,
      "VeMo (human-opt view)": 0.7985480943738656,
      "VeMo (max entropy view)": 0.7773972602739726,
      "VeMo (min entropy view)": 0.7985480943738656,
      "VeMo (random view)": 0.7773972602739726,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is walking widely."
  },
  "000601": {
    "text": "person is sitting down scratching head i think.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2566092163702571,
      "Minus Multimodal Distance": -5.2378058433532715,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.686158354161307e-05,
      "MoBERT-F": 0.4378340309977165,
      "MoBERT-N": 0.49143571969048616,
      "MoBERT-min(F/N)": 0.4378340309977165,
      "MoBERT-max(F/N)": 0.49143571969048616,
      "MotionCritic": -4.519211292266846,
      "VeMo (human-opt view)": 0.00026011138468335894,
      "VeMo (max entropy view)": 0.005532666274278988,
      "VeMo (min entropy view)": 0.00026011138468335894,
      "VeMo (random view)": 0.005532666274278988,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is sitting down, scratching their head."
  },
  "000609": {
    "text": "a person slowly walks in a circle pattern as though they are ballroom dancing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.296926358682232,
      "Minus Multimodal Distance": -6.353126049041748,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3691251044510864e-05,
      "MoBERT-F": 0.42418255685791784,
      "MoBERT-N": 0.48926331610057305,
      "MoBERT-min(F/N)": 0.42418255685791784,
      "MoBERT-max(F/N)": 0.48926331610057305,
      "MotionCritic": -7.494080543518066,
      "VeMo (human-opt view)": 0.0003790534049900044,
      "VeMo (max entropy view)": 0.0003790534049900044,
      "VeMo (min entropy view)": 0.0003572588980253727,
      "VeMo (random view)": 0.0003790534049900044,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person slowly walks in a circular pattern as though they are ballroom dancing."
  },
  "000612": {
    "text": "the person is around and i think holding someone dancing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4654540050565996,
      "Minus Multimodal Distance": -8.713778495788574,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.001016907044686377,
      "MoBERT-F": 0.4909277744579339,
      "MoBERT-N": 0.4361133600706687,
      "MoBERT-min(F/N)": 0.4361133600706687,
      "MoBERT-max(F/N)": 0.4909277744579339,
      "MotionCritic": -7.044320583343506,
      "VeMo (human-opt view)": 0.0007571739192050889,
      "VeMo (max entropy view)": 0.001324088443413016,
      "VeMo (min entropy view)": 0.0007571739192050889,
      "VeMo (random view)": 0.001324088443413016,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is around, and I think they're holding someone and dancing."
  },
  "000633": {
    "text": "a person stands while moving their right arm as if eating something with a spoon or fork three times.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5984419701227468,
      "Minus Multimodal Distance": -4.188227653503418,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.026469494216144e-05,
      "MoBERT-F": 0.3875772157052743,
      "MoBERT-N": 0.38253177622784507,
      "MoBERT-min(F/N)": 0.38253177622784507,
      "MoBERT-max(F/N)": 0.3875772157052743,
      "MotionCritic": -4.8716230392456055,
      "VeMo (human-opt view)": 0.453781512605042,
      "VeMo (max entropy view)": 0.453781512605042,
      "VeMo (min entropy view)": 0.717434869739479,
      "VeMo (random view)": 0.453781512605042,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands while moving their right arm as if eating something with a spoon or fork three times."
  },
  "000665": {
    "text": "a person walks forward, with a quickening step and stops tentatively, as they take their last step.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9675392647373718,
      "Minus Multimodal Distance": -8.205251693725586,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5472183551755734e-05,
      "MoBERT-F": 0.4705935622147668,
      "MoBERT-N": 0.6129463077460203,
      "MoBERT-min(F/N)": 0.4705935622147668,
      "MoBERT-max(F/N)": 0.6129463077460203,
      "MotionCritic": -6.597362041473389,
      "VeMo (human-opt view)": 0.9688179840464104,
      "VeMo (max entropy view)": 0.9526047786917352,
      "VeMo (min entropy view)": 0.9688179840464104,
      "VeMo (random view)": 0.9688179840464104,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward with a quickening step and stops tentatively as they take their last step."
  },
  "000679": {
    "text": "drunk walking animation turning around",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6379768152887688,
      "Minus Multimodal Distance": -9.44787883758545,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.4648823202587664e-05,
      "MoBERT-F": 0.3572712412156715,
      "MoBERT-N": 0.4861614957868029,
      "MoBERT-min(F/N)": 0.3572712412156715,
      "MoBERT-max(F/N)": 0.4861614957868029,
      "MotionCritic": -12.163399696350098,
      "VeMo (human-opt view)": 0.10086455331412104,
      "VeMo (max entropy view)": 0.2336065573770492,
      "VeMo (min entropy view)": 0.10086455331412104,
      "VeMo (random view)": 0.10086455331412104,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Drunk Walking Animation: Turning Around"
  },
  "000687": {
    "text": "a person takes in big steps in a hurry walking into the rectangular area while hands are dangling and swinging.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4796569756488336,
      "Minus Multimodal Distance": -11.769512176513672,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.6707569708814844e-05,
      "MoBERT-F": 0.48794809042905657,
      "MoBERT-N": 0.4738310408817446,
      "MoBERT-min(F/N)": 0.4738310408817446,
      "MoBERT-max(F/N)": 0.48794809042905657,
      "MotionCritic": -11.371696472167969,
      "VeMo (human-opt view)": 0.0599835661462613,
      "VeMo (max entropy view)": 0.0848,
      "VeMo (min entropy view)": 0.0599835661462613,
      "VeMo (random view)": 0.0599835661462613,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person takes big steps in a hurry, walking into the rectangular area while their hands are dangling and swinging."
  },
  "000700": {
    "text": "a person jogs in a straight line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.445951463112395,
      "Minus Multimodal Distance": -3.5151867866516113,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2311027350951917e-05,
      "MoBERT-F": 0.41358720766510104,
      "MoBERT-N": 0.6017403342904684,
      "MoBERT-min(F/N)": 0.41358720766510104,
      "MoBERT-max(F/N)": 0.6017403342904684,
      "MotionCritic": -7.087034702301025,
      "VeMo (human-opt view)": 0.9101194217473287,
      "VeMo (max entropy view)": 0.5776892430278885,
      "VeMo (min entropy view)": 0.9101194217473287,
      "VeMo (random view)": 0.9101194217473287,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jogs in a straight line."
  },
  "000704": {
    "text": "walking in a straight line",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1705646190827077,
      "Minus Multimodal Distance": -9.223421096801758,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.18533940613269806,
      "MoBERT-F": 0.8118159983602,
      "MoBERT-N": 0.7849107628548864,
      "MoBERT-min(F/N)": 0.7849107628548864,
      "MoBERT-max(F/N)": 0.8118159983602,
      "MotionCritic": -8.255626678466797,
      "VeMo (human-opt view)": 0.9830085437266007,
      "VeMo (max entropy view)": 0.9830085437266007,
      "VeMo (min entropy view)": 0.9890404478435701,
      "VeMo (random view)": 0.9890404478435701,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Walking in a straight line"
  },
  "000708": {
    "text": "she jumps up and down, kicking her heels in the air.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.332590402301364,
      "Minus Multimodal Distance": -7.570670127868652,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.033420249819755554,
      "MoBERT-F": 0.6504699317894616,
      "MoBERT-N": 0.6739831913359112,
      "MoBERT-min(F/N)": 0.6504699317894616,
      "MoBERT-max(F/N)": 0.6739831913359112,
      "MotionCritic": -7.148521423339844,
      "VeMo (human-opt view)": 3.109974933602035e-05,
      "VeMo (max entropy view)": 3.109974933602035e-05,
      "VeMo (min entropy view)": 1.625366329375511e-05,
      "VeMo (random view)": 3.109974933602035e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "She jumps up and down, kicking her heels in the air."
  },
  "000710": {
    "text": "a person walks forward while twisting their torso side to side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9473872056832284,
      "Minus Multimodal Distance": -9.409082412719727,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.742262535728514e-05,
      "MoBERT-F": 0.5348254878725013,
      "MoBERT-N": 0.6971413336243587,
      "MoBERT-min(F/N)": 0.5348254878725013,
      "MoBERT-max(F/N)": 0.6971413336243587,
      "MotionCritic": -7.4798264503479,
      "VeMo (human-opt view)": 0.9152,
      "VeMo (max entropy view)": 0.9152,
      "VeMo (min entropy view)": 0.9648757016840417,
      "VeMo (random view)": 0.9152,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward while twisting their torso from side to side."
  },
  "000715": {
    "text": "person steps left forward pivots on leg and turns around walks back and faces other direction",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2182442559238655,
      "Minus Multimodal Distance": -6.951218605041504,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.705920815060381e-05,
      "MoBERT-F": 0.41943966958768647,
      "MoBERT-N": 0.5922693569222461,
      "MoBERT-min(F/N)": 0.41943966958768647,
      "MoBERT-max(F/N)": 0.5922693569222461,
      "MotionCritic": -13.235672950744629,
      "VeMo (human-opt view)": 0.9284253578732107,
      "VeMo (max entropy view)": 0.8515535097813579,
      "VeMo (min entropy view)": 0.9284253578732107,
      "VeMo (random view)": 0.9284253578732107,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps forward to the left, pivots on one leg, turns around, walks back, and faces the other direction."
  },
  "000742": {
    "text": "person stretching the whole right side by lifting the right arm and leaning sideways to the left",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2535903153058494,
      "Minus Multimodal Distance": -6.110986709594727,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.093201303272508e-05,
      "MoBERT-F": 0.3159271581706761,
      "MoBERT-N": 0.44855822161566844,
      "MoBERT-min(F/N)": 0.3159271581706761,
      "MoBERT-max(F/N)": 0.44855822161566844,
      "MotionCritic": -3.030033588409424,
      "VeMo (human-opt view)": 0.8932676518883416,
      "VeMo (max entropy view)": 0.8932676518883416,
      "VeMo (min entropy view)": 0.9706574223102638,
      "VeMo (random view)": 0.8932676518883416,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is stretching the entire right side of their body by lifting their right arm and leaning sideways to the left."
  },
  "000749": {
    "text": "a man using both hands to lift something off ground and places it back on ground in a slightly different position",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.381305811982223,
      "Minus Multimodal Distance": -5.70957088470459,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.030486732313875e-05,
      "MoBERT-F": 0.41403984238032326,
      "MoBERT-N": 0.48702854308790416,
      "MoBERT-min(F/N)": 0.41403984238032326,
      "MoBERT-max(F/N)": 0.48702854308790416,
      "MotionCritic": -2.7394208908081055,
      "VeMo (human-opt view)": 0.0035908896868643747,
      "VeMo (max entropy view)": 0.0035908896868643747,
      "VeMo (min entropy view)": 0.0001441104415367467,
      "VeMo (random view)": 0.0001441104415367467,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man uses both hands to lift something off the ground and places it back on the ground in a slightly different position."
  },
  "000750": {
    "text": "a man postures his arms like holding a dance partner and dances the waltz from the left hand side to the right hand side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8473110137883216,
      "Minus Multimodal Distance": -12.255184173583984,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7493237212183885e-05,
      "MoBERT-F": 0.529073988520777,
      "MoBERT-N": 0.5952765388222766,
      "MoBERT-min(F/N)": 0.529073988520777,
      "MoBERT-max(F/N)": 0.5952765388222766,
      "MotionCritic": -5.100316524505615,
      "VeMo (human-opt view)": 0.7316017316017316,
      "VeMo (max entropy view)": 0.6228287841191067,
      "VeMo (min entropy view)": 0.7316017316017316,
      "VeMo (random view)": 0.6228287841191067,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man positions his arms as if holding a dance partner and waltzes from the left side to the right side."
  },
  "000759": {
    "text": "a person walks unbalanced as if they are on a tight rope.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0861532202882238,
      "Minus Multimodal Distance": -9.27365779876709,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 8.779654308455065e-05,
      "MoBERT-F": 0.654155085768584,
      "MoBERT-N": 0.6412164805927698,
      "MoBERT-min(F/N)": 0.6412164805927698,
      "MoBERT-max(F/N)": 0.654155085768584,
      "MotionCritic": -9.079469680786133,
      "VeMo (human-opt view)": 0.6642728904847397,
      "VeMo (max entropy view)": 0.6224256292906178,
      "VeMo (min entropy view)": 0.6642728904847397,
      "VeMo (random view)": 0.6224256292906178,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks unsteadily as if they are on a tightrope."
  },
  "000781": {
    "text": "a man raises left foot knee high then swings out and puts down repaets this motion twice",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9111237363356375,
      "Minus Multimodal Distance": -14.327839851379395,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2816202545072883e-05,
      "MoBERT-F": 0.43455750892707884,
      "MoBERT-N": 0.5401694743763475,
      "MoBERT-min(F/N)": 0.43455750892707884,
      "MoBERT-max(F/N)": 0.5401694743763475,
      "MotionCritic": -4.931278705596924,
      "VeMo (human-opt view)": 0.8872037914691943,
      "VeMo (max entropy view)": 0.8872037914691943,
      "VeMo (min entropy view)": 0.8935944944415034,
      "VeMo (random view)": 0.8872037914691943,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man raises his left foot knee - high, then swings it out and puts it down. He repeats this motion twice."
  },
  "000792": {
    "text": "a person imitates biting into something then waves their right hand around randomly.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0864248462155426,
      "Minus Multimodal Distance": -6.261916637420654,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.713092883117497e-05,
      "MoBERT-F": 0.4511209396209954,
      "MoBERT-N": 0.4792504776106104,
      "MoBERT-min(F/N)": 0.4511209396209954,
      "MoBERT-max(F/N)": 0.4792504776106104,
      "MotionCritic": -5.716822624206543,
      "VeMo (human-opt view)": 0.3350383631713555,
      "VeMo (max entropy view)": 0.4212121212121212,
      "VeMo (min entropy view)": 0.3350383631713555,
      "VeMo (random view)": 0.4212121212121212,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person imitates biting into something and then waves their right hand around randomly."
  },
  "000813": {
    "text": "a standing person lifts their left hand and slowly touches their head",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6174415259706778,
      "Minus Multimodal Distance": -3.980307102203369,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.3633015593513846e-05,
      "MoBERT-F": 0.3184603640739596,
      "MoBERT-N": 0.40414603267304444,
      "MoBERT-min(F/N)": 0.3184603640739596,
      "MoBERT-max(F/N)": 0.40414603267304444,
      "MotionCritic": -9.583831787109375,
      "VeMo (human-opt view)": 0.7061611374407583,
      "VeMo (max entropy view)": 0.7061611374407583,
      "VeMo (min entropy view)": 0.7878787878787878,
      "VeMo (random view)": 0.7878787878787878,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A standing person lifts their left hand and slowly touches their head."
  },
  "000815": {
    "text": "the man walks back and forth putting something on a shelf.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.8447071487343838,
      "Minus Multimodal Distance": -4.487745761871338,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9762352705001831,
      "MoBERT-F": 0.6982386421545357,
      "MoBERT-N": 0.46087089851204543,
      "MoBERT-min(F/N)": 0.46087089851204543,
      "MoBERT-max(F/N)": 0.6982386421545357,
      "MotionCritic": -14.709503173828125,
      "VeMo (human-opt view)": 0.00012002984032992506,
      "VeMo (max entropy view)": 0.00012002984032992506,
      "VeMo (min entropy view)": 5.1408712687847566e-05,
      "VeMo (random view)": 5.1408712687847566e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man walks back and forth, putting something on a shelf."
  },
  "000820": {
    "text": "using his left hand he smacks his right arm bicep.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7528560550888626,
      "Minus Multimodal Distance": -3.7238686084747314,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.151070970692672e-05,
      "MoBERT-F": 0.2854758299452246,
      "MoBERT-N": 0.42699698172466766,
      "MoBERT-min(F/N)": 0.2854758299452246,
      "MoBERT-max(F/N)": 0.42699698172466766,
      "MotionCritic": -3.5458459854125977,
      "VeMo (human-opt view)": 0.8736616702355461,
      "VeMo (max entropy view)": 0.4835164835164835,
      "VeMo (min entropy view)": 0.8736616702355461,
      "VeMo (random view)": 0.8736616702355461,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Using his left hand, he smacks the bicep of his right arm."
  },
  "000824": {
    "text": "person is jogging and then gets down and walks like an ape and then gets back up and jogs again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.9005705852699974,
      "Minus Multimodal Distance": -7.360279560089111,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.05195892974734306,
      "MoBERT-F": 0.47995357433805813,
      "MoBERT-N": 0.39527088476443883,
      "MoBERT-min(F/N)": 0.39527088476443883,
      "MoBERT-max(F/N)": 0.47995357433805813,
      "MotionCritic": -9.954133987426758,
      "VeMo (human-opt view)": 0.42245989304812837,
      "VeMo (max entropy view)": 0.42245989304812837,
      "VeMo (min entropy view)": 0.622478386167147,
      "VeMo (random view)": 0.622478386167147,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is jogging, then gets down and walks like an ape, then gets back up and jogs again."
  },
  "000825": {
    "text": "a person at a standstill starts running, then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.354312624794019,
      "Minus Multimodal Distance": -5.703484058380127,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.470872201840393e-05,
      "MoBERT-F": 0.39501538465097996,
      "MoBERT-N": 0.5884156994556957,
      "MoBERT-min(F/N)": 0.39501538465097996,
      "MoBERT-max(F/N)": 0.5884156994556957,
      "MotionCritic": -10.946782112121582,
      "VeMo (human-opt view)": 0.9919363284113519,
      "VeMo (max entropy view)": 0.9840841650930672,
      "VeMo (min entropy view)": 0.9919363284113519,
      "VeMo (random view)": 0.9919363284113519,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person at a standstill starts running and then stops."
  },
  "000827": {
    "text": "a man walks foward ,makes a u turn to the right side ,walks and then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.077781930543697,
      "Minus Multimodal Distance": -9.549322128295898,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6654191970010288e-05,
      "MoBERT-F": 0.3720836101072098,
      "MoBERT-N": 0.45176768588218497,
      "MoBERT-min(F/N)": 0.3720836101072098,
      "MoBERT-max(F/N)": 0.45176768588218497,
      "MotionCritic": -19.895904541015625,
      "VeMo (human-opt view)": 0.8743400211193242,
      "VeMo (max entropy view)": 0.8743400211193242,
      "VeMo (min entropy view)": 0.9193934557063048,
      "VeMo (random view)": 0.8743400211193242,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man walks forward, makes a U - turn to the right side, walks, and then stops."
  },
  "000847": {
    "text": "the person is walking knees down line crawling.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8808591834174793,
      "Minus Multimodal Distance": -5.0040059089660645,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3769189283484593e-05,
      "MoBERT-F": 0.39103354905117427,
      "MoBERT-N": 0.4241440582590881,
      "MoBERT-min(F/N)": 0.39103354905117427,
      "MoBERT-max(F/N)": 0.4241440582590881,
      "MotionCritic": -7.486785411834717,
      "VeMo (human-opt view)": 0.8673469387755102,
      "VeMo (max entropy view)": 0.8673469387755102,
      "VeMo (min entropy view)": 0.8745980707395499,
      "VeMo (random view)": 0.8673469387755102,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking, knees down, crawling along a line."
  },
  "000865": {
    "text": "the person is preforming a swimming stroke know as the butterfly stroke.  the arms swing from behind the head and reenter the water propelling the person forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4957850636002974,
      "Minus Multimodal Distance": -3.9731132984161377,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 8.707034430699423e-05,
      "MoBERT-F": 0.49790341622523493,
      "MoBERT-N": 0.3907449498326621,
      "MoBERT-min(F/N)": 0.3907449498326621,
      "MoBERT-max(F/N)": 0.49790341622523493,
      "MotionCritic": -6.411703586578369,
      "VeMo (human-opt view)": 0.7987711213517665,
      "VeMo (max entropy view)": 0.7880386983289358,
      "VeMo (min entropy view)": 0.7987711213517665,
      "VeMo (random view)": 0.7987711213517665,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is performing a swimming stroke known as the butterfly stroke. The arms swing from behind the head and re - enter the water, propelling the person forward."
  },
  "000886": {
    "text": "a person sits down in a chair and then gets back up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0728407677246916,
      "Minus Multimodal Distance": -8.901256561279297,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7714622774510644e-05,
      "MoBERT-F": 0.33596270505325593,
      "MoBERT-N": 0.42372720460437024,
      "MoBERT-min(F/N)": 0.33596270505325593,
      "MoBERT-max(F/N)": 0.42372720460437024,
      "MotionCritic": -8.370379447937012,
      "VeMo (human-opt view)": 0.17307692307692307,
      "VeMo (max entropy view)": 0.19220549158547387,
      "VeMo (min entropy view)": 0.17307692307692307,
      "VeMo (random view)": 0.19220549158547387,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits down in a chair and then gets back up."
  },
  "000921": {
    "text": "person clasps both hands together then waves arms to side then goes down on right knee bent over ties something on feet then gets up",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3836396749431705,
      "Minus Multimodal Distance": -6.071809768676758,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.375553569640033e-05,
      "MoBERT-F": 0.4833500740789304,
      "MoBERT-N": 0.4783921315962732,
      "MoBERT-min(F/N)": 0.4783921315962732,
      "MoBERT-max(F/N)": 0.4833500740789304,
      "MotionCritic": -5.8057475090026855,
      "VeMo (human-opt view)": 0.7433155080213903,
      "VeMo (max entropy view)": 0.7433155080213903,
      "VeMo (min entropy view)": 0.8361749444032617,
      "VeMo (random view)": 0.8361749444032617,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person clasps both hands together, then waves their arms to the side. Next, the person goes down on their right knee, bends over, ties something on their foot, and then gets up."
  },
  "000954": {
    "text": "a person is swinging a tennis racket.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9003817838761796,
      "Minus Multimodal Distance": -7.954312801361084,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 8.461619290756062e-05,
      "MoBERT-F": 0.5769138576197721,
      "MoBERT-N": 0.5378372497154854,
      "MoBERT-min(F/N)": 0.5378372497154854,
      "MoBERT-max(F/N)": 0.5769138576197721,
      "MotionCritic": 2.588527202606201,
      "VeMo (human-opt view)": 8.400835763146494e-06,
      "VeMo (max entropy view)": 1.0794665097950224e-05,
      "VeMo (min entropy view)": 8.400835763146494e-06,
      "VeMo (random view)": 8.400835763146494e-06,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is swinging a tennis racket."
  },
  "000972": {
    "text": "this person does a short sprint forward, holding their arms up to their chest level.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0529502130887896,
      "Minus Multimodal Distance": -8.837870597839355,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.207756476011127e-05,
      "MoBERT-F": 0.3608559272163303,
      "MoBERT-N": 0.5514976487121195,
      "MoBERT-min(F/N)": 0.3608559272163303,
      "MoBERT-max(F/N)": 0.5514976487121195,
      "MotionCritic": -5.459972858428955,
      "VeMo (human-opt view)": 0.2129032258064516,
      "VeMo (max entropy view)": 0.407098121085595,
      "VeMo (min entropy view)": 0.2129032258064516,
      "VeMo (random view)": 0.407098121085595,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person does a short sprint forward, holding their arms up to chest level."
  },
  "000998": {
    "text": "while walking forward he appears to be pushed, he rights himself and continues walking.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5675666434416473,
      "Minus Multimodal Distance": -9.712313652038574,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9849263429641724,
      "MoBERT-F": 0.7280209141383653,
      "MoBERT-N": 0.6515836405708069,
      "MoBERT-min(F/N)": 0.6515836405708069,
      "MoBERT-max(F/N)": 0.7280209141383653,
      "MotionCritic": -12.638450622558594,
      "VeMo (human-opt view)": 0.7881773399014779,
      "VeMo (max entropy view)": 0.7881773399014779,
      "VeMo (min entropy view)": 0.8444902162718847,
      "VeMo (random view)": 0.8444902162718847,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "While walking forward, he appears to be pushed. He rights himself and continues walking."
  },
  "001003": {
    "text": "the person is walking forward using both their left and right hand for balance. their left and right hand are pressing down on something to help them walk. they put their right foot directly in front of the left and then the left directly in front of the right they stop walking and stand",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9543277579623541,
      "Minus Multimodal Distance": -6.064695358276367,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3906221031211317e-05,
      "MoBERT-F": 0.5052626773455451,
      "MoBERT-N": 0.5796530868277221,
      "MoBERT-min(F/N)": 0.5052626773455451,
      "MoBERT-max(F/N)": 0.5796530868277221,
      "MotionCritic": -12.202798843383789,
      "VeMo (human-opt view)": 0.8266953713670614,
      "VeMo (max entropy view)": 0.8172043010752689,
      "VeMo (min entropy view)": 0.8266953713670614,
      "VeMo (random view)": 0.8172043010752689,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking forward, using both their left and right hands for balance. Their left and right hands are pressing down on something to help them walk. They put their right foot directly in front of the left, and then the left directly in front of the right. They stop walking and stand."
  },
  "001005": {
    "text": "a person touches each elbow to the opposite knee then spreads his legs and starts to do squats.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1099492543407454,
      "Minus Multimodal Distance": -10.5007963180542,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.469453465892002e-05,
      "MoBERT-F": 0.3666444964623517,
      "MoBERT-N": 0.43740844084885966,
      "MoBERT-min(F/N)": 0.3666444964623517,
      "MoBERT-max(F/N)": 0.43740844084885966,
      "MotionCritic": -4.225587368011475,
      "VeMo (human-opt view)": 0.5310734463276836,
      "VeMo (max entropy view)": 0.5310734463276836,
      "VeMo (min entropy view)": 0.3771043771043771,
      "VeMo (random view)": 0.5310734463276836,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person touches each elbow to the opposite knee, then spreads their legs and starts to do squats."
  },
  "001008": {
    "text": "a person walks forward then turns completely around and does a cartwheel.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.139998129589816,
      "Minus Multimodal Distance": -7.046415328979492,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8673928682110272e-05,
      "MoBERT-F": 0.41909378879867487,
      "MoBERT-N": 0.5524842437303727,
      "MoBERT-min(F/N)": 0.41909378879867487,
      "MoBERT-max(F/N)": 0.5524842437303727,
      "MotionCritic": -13.7486572265625,
      "VeMo (human-opt view)": 0.9840999803703462,
      "VeMo (max entropy view)": 0.9552955295529553,
      "VeMo (min entropy view)": 0.9840999803703462,
      "VeMo (random view)": 0.9840999803703462,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, then turns completely around and does a cartwheel."
  },
  "001014": {
    "text": "person has arms crossing.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6002006041124047,
      "Minus Multimodal Distance": -3.974668502807617,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6010284273070283e-05,
      "MoBERT-F": 0.3845629842341994,
      "MoBERT-N": 0.5035457977905139,
      "MoBERT-min(F/N)": 0.3845629842341994,
      "MoBERT-max(F/N)": 0.5035457977905139,
      "MotionCritic": -6.5215044021606445,
      "VeMo (human-opt view)": 0.9626321974148061,
      "VeMo (max entropy view)": 0.5925925925925926,
      "VeMo (min entropy view)": 0.9626321974148061,
      "VeMo (random view)": 0.9626321974148061,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person has their arms crossed."
  },
  "001029": {
    "text": "someone is walking forward and holding a handrail very carefully, as if they are afraid of falling.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.067392702639495,
      "Minus Multimodal Distance": -6.0988264083862305,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.863353984139394e-05,
      "MoBERT-F": 0.3841742507109623,
      "MoBERT-N": 0.5075701062891704,
      "MoBERT-min(F/N)": 0.3841742507109623,
      "MoBERT-max(F/N)": 0.5075701062891704,
      "MotionCritic": -12.131538391113281,
      "VeMo (human-opt view)": 0.002182725288431556,
      "VeMo (max entropy view)": 0.002182725288431556,
      "VeMo (min entropy view)": 0.002047072881671352,
      "VeMo (random view)": 0.002182725288431556,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone is walking forward and holding a handrail very carefully, as if they're afraid of falling."
  },
  "001038": {
    "text": "a person warming up by swinging back and arms around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2213994027676633,
      "Minus Multimodal Distance": -11.026213645935059,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.26891496777534485,
      "MoBERT-F": 0.6850412328849558,
      "MoBERT-N": 0.5367936013166044,
      "MoBERT-min(F/N)": 0.5367936013166044,
      "MoBERT-max(F/N)": 0.6850412328849558,
      "MotionCritic": -5.08382511138916,
      "VeMo (human-opt view)": 0.9146816803811174,
      "VeMo (max entropy view)": 0.9146816803811174,
      "VeMo (min entropy view)": 0.9724521309450278,
      "VeMo (random view)": 0.9146816803811174,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is warming up by swinging their arms back and around."
  },
  "001052": {
    "text": "a person who performed a left handed uppercut",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3446298348212782,
      "Minus Multimodal Distance": -7.530115127563477,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4257753466372378e-05,
      "MoBERT-F": 0.45306041267271174,
      "MoBERT-N": 0.5255691228395309,
      "MoBERT-min(F/N)": 0.45306041267271174,
      "MoBERT-max(F/N)": 0.5255691228395309,
      "MotionCritic": -6.32471227645874,
      "VeMo (human-opt view)": 0.28125,
      "VeMo (max entropy view)": 0.5311355311355311,
      "VeMo (min entropy view)": 0.28125,
      "VeMo (random view)": 0.28125,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who performed a left - handed uppercut."
  },
  "001059": {
    "text": "the person is holding their head while walking.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6311923614602184,
      "Minus Multimodal Distance": -10.502277374267578,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5211760657839477e-05,
      "MoBERT-F": 0.45763923830203224,
      "MoBERT-N": 0.4463498297317727,
      "MoBERT-min(F/N)": 0.4463498297317727,
      "MoBERT-max(F/N)": 0.45763923830203224,
      "MotionCritic": -13.556462287902832,
      "VeMo (human-opt view)": 0.20215633423180593,
      "VeMo (max entropy view)": 0.3482849604221636,
      "VeMo (min entropy view)": 0.20215633423180593,
      "VeMo (random view)": 0.3482849604221636,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is holding their head while walking."
  },
  "001113": {
    "text": "this person is cleaning a table.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9821278689975428,
      "Minus Multimodal Distance": -2.216768980026245,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.062785617657937e-05,
      "MoBERT-F": 0.4271798493450455,
      "MoBERT-N": 0.5429657325346182,
      "MoBERT-min(F/N)": 0.4271798493450455,
      "MoBERT-max(F/N)": 0.5429657325346182,
      "MotionCritic": -1.7645498514175415,
      "VeMo (human-opt view)": 0.0001539571026893875,
      "VeMo (max entropy view)": 0.00043139144207546515,
      "VeMo (min entropy view)": 0.0001539571026893875,
      "VeMo (random view)": 0.00043139144207546515,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person is cleaning a table."
  },
  "001152": {
    "text": "a person performs a typical broadjump.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9888691729575485,
      "Minus Multimodal Distance": -4.649226665496826,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.5479773283004761,
      "MoBERT-F": 0.7335326855173863,
      "MoBERT-N": 0.6645134448964866,
      "MoBERT-min(F/N)": 0.6645134448964866,
      "MoBERT-max(F/N)": 0.7335326855173863,
      "MotionCritic": -8.240416526794434,
      "VeMo (human-opt view)": 0.9241706161137441,
      "VeMo (max entropy view)": 0.9100169779286927,
      "VeMo (min entropy view)": 0.9241706161137441,
      "VeMo (random view)": 0.9241706161137441,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person performs a typical broad jump."
  },
  "001161": {
    "text": "person runs quickly straight forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.8805133935252805,
      "Minus Multimodal Distance": -7.699490547180176,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.268121716042515e-05,
      "MoBERT-F": 0.4046616132127645,
      "MoBERT-N": 0.5816148465533809,
      "MoBERT-min(F/N)": 0.4046616132127645,
      "MoBERT-max(F/N)": 0.5816148465533809,
      "MotionCritic": -11.860095977783203,
      "VeMo (human-opt view)": 0.5310880829015544,
      "VeMo (max entropy view)": 0.5310880829015544,
      "VeMo (min entropy view)": 0.02597187758478081,
      "VeMo (random view)": 0.02597187758478081,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs quickly straight forward."
  },
  "001168": {
    "text": "a person walks forward casually with a swagger to their hips.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9709762008688743,
      "Minus Multimodal Distance": -8.67341136932373,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.041780099272727966,
      "MoBERT-F": 0.7932370797321542,
      "MoBERT-N": 0.6778638641495806,
      "MoBERT-min(F/N)": 0.6778638641495806,
      "MoBERT-max(F/N)": 0.7932370797321542,
      "MotionCritic": -15.336328506469727,
      "VeMo (human-opt view)": 0.8931245745405038,
      "VeMo (max entropy view)": 0.8598524762908325,
      "VeMo (min entropy view)": 0.8931245745405038,
      "VeMo (random view)": 0.8931245745405038,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward casually, swaying their hips with a swagger."
  },
  "001169": {
    "text": "a person doing a spesific moves with legs and hands while doing boxing",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.7375221202541193,
      "Minus Multimodal Distance": -5.88657283782959,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.04754575714468956,
      "MoBERT-F": 0.641238074432255,
      "MoBERT-N": 0.6068232420605294,
      "MoBERT-min(F/N)": 0.6068232420605294,
      "MoBERT-max(F/N)": 0.641238074432255,
      "MotionCritic": -4.171176910400391,
      "VeMo (human-opt view)": 0.9552238805970149,
      "VeMo (max entropy view)": 0.9100169779286927,
      "VeMo (min entropy view)": 0.9552238805970149,
      "VeMo (random view)": 0.9100169779286927,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person doing specific movements with legs and hands while doing boxing."
  },
  "001171": {
    "text": "the person is getting a shower and washing his right arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4232523016335343,
      "Minus Multimodal Distance": -9.630107879638672,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.500853876583278e-05,
      "MoBERT-F": 0.5292478591610923,
      "MoBERT-N": 0.5458442053902767,
      "MoBERT-min(F/N)": 0.5292478591610923,
      "MoBERT-max(F/N)": 0.5458442053902767,
      "MotionCritic": -6.171844482421875,
      "VeMo (human-opt view)": 0.00016408419747634452,
      "VeMo (max entropy view)": 0.0021766641356618576,
      "VeMo (min entropy view)": 0.00016408419747634452,
      "VeMo (random view)": 0.0021766641356618576,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is taking a shower and washing his right arm."
  },
  "001182": {
    "text": "a person lifts each knee towards the opposite elbow",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3430501476300558,
      "Minus Multimodal Distance": -10.73111343383789,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.3264375815633684e-05,
      "MoBERT-F": 0.3817899490964402,
      "MoBERT-N": 0.4296966332977083,
      "MoBERT-min(F/N)": 0.3817899490964402,
      "MoBERT-max(F/N)": 0.4296966332977083,
      "MotionCritic": -4.569948196411133,
      "VeMo (human-opt view)": 0.45348837209302323,
      "VeMo (max entropy view)": 0.45348837209302323,
      "VeMo (min entropy view)": 0.80859375,
      "VeMo (random view)": 0.45348837209302323,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person lifts each knee towards the opposite elbow."
  },
  "001193": {
    "text": "a man jumps once and then wobbles a little while moving legs apart.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4486209354030728,
      "Minus Multimodal Distance": -9.148877143859863,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.3265703618526459,
      "MoBERT-F": 0.8010678315501978,
      "MoBERT-N": 0.7841105791184282,
      "MoBERT-min(F/N)": 0.7841105791184282,
      "MoBERT-max(F/N)": 0.8010678315501978,
      "MotionCritic": -1.2973618507385254,
      "VeMo (human-opt view)": 0.6233333333333333,
      "VeMo (max entropy view)": 0.6233333333333333,
      "VeMo (min entropy view)": 0.3073322932917317,
      "VeMo (random view)": 0.6233333333333333,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man jumps once and then wobbles a little while moving his legs apart."
  },
  "001209": {
    "text": "the person was moving his arms around like he was drunk.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.352965772479667,
      "Minus Multimodal Distance": -4.537552356719971,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.003658339148387313,
      "MoBERT-F": 0.6078069868321145,
      "MoBERT-N": 0.5237961022141648,
      "MoBERT-min(F/N)": 0.5237961022141648,
      "MoBERT-max(F/N)": 0.6078069868321145,
      "MotionCritic": -5.868831634521484,
      "VeMo (human-opt view)": 0.24444444444444444,
      "VeMo (max entropy view)": 0.24444444444444444,
      "VeMo (min entropy view)": 0.027637100274924033,
      "VeMo (random view)": 0.24444444444444444,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person was moving his arms around as if he was drunk."
  },
  "001218": {
    "text": "a man walks in a curved line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6398953198280142,
      "Minus Multimodal Distance": -8.936442375183105,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.2521391403861344e-05,
      "MoBERT-F": 0.5832152628225044,
      "MoBERT-N": 0.6136940585651194,
      "MoBERT-min(F/N)": 0.5832152628225044,
      "MoBERT-max(F/N)": 0.6136940585651194,
      "MotionCritic": -8.297844886779785,
      "VeMo (human-opt view)": 0.808199121522694,
      "VeMo (max entropy view)": 0.808199121522694,
      "VeMo (min entropy view)": 0.8739205526770294,
      "VeMo (random view)": 0.8739205526770294,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks in a curved line."
  },
  "001225": {
    "text": "person walks five steps forward whilst holding right hand extended to the right hand side holding onto something",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1192829047072348,
      "Minus Multimodal Distance": -3.2810258865356445,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.193972927168943e-05,
      "MoBERT-F": 0.3220533646341206,
      "MoBERT-N": 0.4387891339817688,
      "MoBERT-min(F/N)": 0.3220533646341206,
      "MoBERT-max(F/N)": 0.4387891339817688,
      "MotionCritic": -6.597081661224365,
      "VeMo (human-opt view)": 0.20245398773006135,
      "VeMo (max entropy view)": 0.3773987206823028,
      "VeMo (min entropy view)": 0.20245398773006135,
      "VeMo (random view)": 0.20245398773006135,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks five steps forward while holding their right hand extended to the right side, grasping something."
  },
  "001249": {
    "text": "a person steps forward and reaches down to grab or place something with their left hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7394690307756783,
      "Minus Multimodal Distance": -8.237895011901855,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00013562290405388921,
      "MoBERT-F": 0.5714309474359404,
      "MoBERT-N": 0.6144810356847334,
      "MoBERT-min(F/N)": 0.5714309474359404,
      "MoBERT-max(F/N)": 0.6144810356847334,
      "MotionCritic": -15.968389511108398,
      "VeMo (human-opt view)": 0.7191489361702128,
      "VeMo (max entropy view)": 0.6226415094339622,
      "VeMo (min entropy view)": 0.7191489361702128,
      "VeMo (random view)": 0.6226415094339622,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps forward and reaches down to grab or place something with their left hand."
  },
  "001250": {
    "text": "person runs on an exercise machine.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5031960938111746,
      "Minus Multimodal Distance": -3.6016557216644287,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.692973743658513e-05,
      "MoBERT-F": 0.31428329505333746,
      "MoBERT-N": 0.5473927082569243,
      "MoBERT-min(F/N)": 0.31428329505333746,
      "MoBERT-max(F/N)": 0.5473927082569243,
      "MotionCritic": -9.925339698791504,
      "VeMo (human-opt view)": 3.6114421556326137e-06,
      "VeMo (max entropy view)": 3.6114421556326137e-06,
      "VeMo (min entropy view)": 3.2801429403682404e-06,
      "VeMo (random view)": 3.6114421556326137e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person runs on an exercise machine."
  },
  "001278": {
    "text": "a person raises their left arm to their face pensively while shifting their weight from side to side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6476958889903482,
      "Minus Multimodal Distance": -6.8991193771362305,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5716115487739444e-05,
      "MoBERT-F": 0.39010737367617465,
      "MoBERT-N": 0.5104158898407037,
      "MoBERT-min(F/N)": 0.39010737367617465,
      "MoBERT-max(F/N)": 0.5104158898407037,
      "MotionCritic": -14.645101547241211,
      "VeMo (human-opt view)": 0.9399545602077247,
      "VeMo (max entropy view)": 0.9399545602077247,
      "VeMo (min entropy view)": 0.9496619083395943,
      "VeMo (random view)": 0.9496619083395943,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person raises their left arm to their face pensively while shifting their weight from side to side."
  },
  "001285": {
    "text": "someone doing the chicken dance",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7822239791887553,
      "Minus Multimodal Distance": -9.277594566345215,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.79367250186624e-05,
      "MoBERT-F": 0.3065762771465179,
      "MoBERT-N": 0.4676143426102212,
      "MoBERT-min(F/N)": 0.3065762771465179,
      "MoBERT-max(F/N)": 0.4676143426102212,
      "MotionCritic": -1.3790684938430786,
      "VeMo (human-opt view)": 1.716584302408749e-05,
      "VeMo (max entropy view)": 4.120675137634437e-05,
      "VeMo (min entropy view)": 1.716584302408749e-05,
      "VeMo (random view)": 1.716584302408749e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone is doing the chicken dance."
  },
  "001298": {
    "text": "someone stands with their hands on their upper legs spread apart, then straightens up and outstretches both arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5737679010760579,
      "Minus Multimodal Distance": -5.025108814239502,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4442459107376635e-05,
      "MoBERT-F": 0.44594515106696336,
      "MoBERT-N": 0.48760820969200025,
      "MoBERT-min(F/N)": 0.44594515106696336,
      "MoBERT-max(F/N)": 0.48760820969200025,
      "MotionCritic": -13.513026237487793,
      "VeMo (human-opt view)": 0.9963961366584979,
      "VeMo (max entropy view)": 0.9577464788732394,
      "VeMo (min entropy view)": 0.9963961366584979,
      "VeMo (random view)": 0.9577464788732394,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone stands with their hands on their upper legs, legs spread apart, then straightens up and outstretches both arms."
  },
  "001330": {
    "text": "a person uses right arm to hit down to the left and then the left arm crosses over the right side of the body.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9515192827805422,
      "Minus Multimodal Distance": -9.95789623260498,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7058538762503304e-05,
      "MoBERT-F": 0.31197903649588454,
      "MoBERT-N": 0.4137778086872094,
      "MoBERT-min(F/N)": 0.31197903649588454,
      "MoBERT-max(F/N)": 0.4137778086872094,
      "MotionCritic": -7.91910457611084,
      "VeMo (human-opt view)": 0.0007087525598789066,
      "VeMo (max entropy view)": 0.006260956674179815,
      "VeMo (min entropy view)": 0.0007087525598789066,
      "VeMo (random view)": 0.006260956674179815,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person uses the right arm to hit down to the left, and then the left arm crosses over the right side of the body."
  },
  "001349": {
    "text": "a man opens something and rubs it under his arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7276217910668894,
      "Minus Multimodal Distance": -10.11042594909668,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.8613165088463575e-05,
      "MoBERT-F": 0.4582895635605916,
      "MoBERT-N": 0.5721360362058683,
      "MoBERT-min(F/N)": 0.4582895635605916,
      "MoBERT-max(F/N)": 0.5721360362058683,
      "MotionCritic": -4.51440954208374,
      "VeMo (human-opt view)": 0.2682926829268293,
      "VeMo (max entropy view)": 0.2682926829268293,
      "VeMo (min entropy view)": 0.04198473282442748,
      "VeMo (random view)": 0.04198473282442748,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man opens something and rubs it under his arms."
  },
  "001359": {
    "text": "a person puts their left hand up by their head",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6452609069021262,
      "Minus Multimodal Distance": -7.855982303619385,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3788259568391368e-05,
      "MoBERT-F": 0.4108599719557763,
      "MoBERT-N": 0.5268774395502624,
      "MoBERT-min(F/N)": 0.4108599719557763,
      "MoBERT-max(F/N)": 0.5268774395502624,
      "MotionCritic": -4.490853786468506,
      "VeMo (human-opt view)": 0.9601820250284414,
      "VeMo (max entropy view)": 0.9601820250284414,
      "VeMo (min entropy view)": 0.9754931261207412,
      "VeMo (random view)": 0.9754931261207412,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person puts their left hand up near their head."
  },
  "001369": {
    "text": "a person walks forward, raises his right arm in front of him, then lowers his arm and walks backwards.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9551638359902334,
      "Minus Multimodal Distance": -11.650157928466797,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.85082203138154e-05,
      "MoBERT-F": 0.3052626173891624,
      "MoBERT-N": 0.5093674316535571,
      "MoBERT-min(F/N)": 0.3052626173891624,
      "MoBERT-max(F/N)": 0.5093674316535571,
      "MotionCritic": -4.508209228515625,
      "VeMo (human-opt view)": 0.4688995215311005,
      "VeMo (max entropy view)": 0.4688995215311005,
      "VeMo (min entropy view)": 0.7312295973884657,
      "VeMo (random view)": 0.7312295973884657,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, raises his right arm in front of himself, then lowers his arm and walks backwards."
  },
  "001380": {
    "text": "a person bent slightly over and picked something up with right hand and turned to right and  shaking item, looks as if is cooking and adding items to a pot of water.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9321326628662454,
      "Minus Multimodal Distance": -6.498244285583496,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.216591358068399e-05,
      "MoBERT-F": 0.4347214227702654,
      "MoBERT-N": 0.5108107257183656,
      "MoBERT-min(F/N)": 0.4347214227702654,
      "MoBERT-max(F/N)": 0.5108107257183656,
      "MotionCritic": -3.3937835693359375,
      "VeMo (human-opt view)": 0.006314046164993272,
      "VeMo (max entropy view)": 0.006314046164993272,
      "VeMo (min entropy view)": 0.0008062742798504728,
      "VeMo (random view)": 0.0008062742798504728,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bent slightly over, picked something up with their right hand, turned to the right, and shook the item. It looks as if they are cooking and adding items to a pot of water."
  },
  "001384": {
    "text": "a person lifts his left hand and waves his hand to say hello then puts left hand back down",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6358262031460231,
      "Minus Multimodal Distance": -12.155780792236328,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4267355911433697e-05,
      "MoBERT-F": 0.36371811228727585,
      "MoBERT-N": 0.42940865376749765,
      "MoBERT-min(F/N)": 0.36371811228727585,
      "MoBERT-max(F/N)": 0.42940865376749765,
      "MotionCritic": -9.644449234008789,
      "VeMo (human-opt view)": 0.7661870503597122,
      "VeMo (max entropy view)": 0.7317073170731707,
      "VeMo (min entropy view)": 0.7661870503597122,
      "VeMo (random view)": 0.7317073170731707,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lifts his left hand, waves it to say hello, then puts his left hand back down."
  },
  "001406": {
    "text": "a person walk forward cautiously with their hand against a surface.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9583769406232611,
      "Minus Multimodal Distance": -10.917941093444824,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.509309706510976e-05,
      "MoBERT-F": 0.3859316250212591,
      "MoBERT-N": 0.5046293275094778,
      "MoBERT-min(F/N)": 0.3859316250212591,
      "MoBERT-max(F/N)": 0.5046293275094778,
      "MotionCritic": -6.959197521209717,
      "VeMo (human-opt view)": 0.4681933842239186,
      "VeMo (max entropy view)": 0.4681933842239186,
      "VeMo (min entropy view)": 0.12587859424920128,
      "VeMo (random view)": 0.4681933842239186,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward cautiously with their hand against a surface."
  },
  "001429": {
    "text": "a man bends his right arm at the elbow in a struggling motion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7423801178193837,
      "Minus Multimodal Distance": -9.91767406463623,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3101045371731743e-05,
      "MoBERT-F": 0.3270604932407646,
      "MoBERT-N": 0.466733788391413,
      "MoBERT-min(F/N)": 0.3270604932407646,
      "MoBERT-max(F/N)": 0.466733788391413,
      "MotionCritic": -5.572312355041504,
      "VeMo (human-opt view)": 0.6513761467889908,
      "VeMo (max entropy view)": 0.6513761467889908,
      "VeMo (min entropy view)": 0.7979539641943734,
      "VeMo (random view)": 0.6513761467889908,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man bends his right arm at the elbow in a struggling motion."
  },
  "001473": {
    "text": "a person sits in a  chair then stands back up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0881379120059715,
      "Minus Multimodal Distance": -10.105890274047852,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.9970582040259615e-05,
      "MoBERT-F": 0.3537428240098694,
      "MoBERT-N": 0.36262705218003427,
      "MoBERT-min(F/N)": 0.3537428240098694,
      "MoBERT-max(F/N)": 0.36262705218003427,
      "MotionCritic": -6.5883259773254395,
      "VeMo (human-opt view)": 0.46835443037974683,
      "VeMo (max entropy view)": 0.46835443037974683,
      "VeMo (min entropy view)": 0.2943201376936317,
      "VeMo (random view)": 0.46835443037974683,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits in a chair and then stands back up."
  },
  "001483": {
    "text": "someone rubs their belly with their left hand and rubs their head with their right hand at the same time.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2449082449212563,
      "Minus Multimodal Distance": -11.687103271484375,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.935346830985509e-05,
      "MoBERT-F": 0.38491155156083645,
      "MoBERT-N": 0.39098498080024824,
      "MoBERT-min(F/N)": 0.38491155156083645,
      "MoBERT-max(F/N)": 0.39098498080024824,
      "MotionCritic": -11.817152976989746,
      "VeMo (human-opt view)": 0.29390018484288355,
      "VeMo (max entropy view)": 0.29390018484288355,
      "VeMo (min entropy view)": 0.23333333333333334,
      "VeMo (random view)": 0.23333333333333334,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone rubs their belly with their left hand and their head with their right hand at the same time."
  },
  "001486": {
    "text": "the person is picking up something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9747021791588126,
      "Minus Multimodal Distance": -9.550371170043945,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 8.221349708037451e-05,
      "MoBERT-F": 0.47896124069385204,
      "MoBERT-N": 0.4815294651742776,
      "MoBERT-min(F/N)": 0.47896124069385204,
      "MoBERT-max(F/N)": 0.4815294651742776,
      "MotionCritic": -6.705172061920166,
      "VeMo (human-opt view)": 0.8356435643564356,
      "VeMo (max entropy view)": 0.8356435643564356,
      "VeMo (min entropy view)": 0.8740894901144641,
      "VeMo (random view)": 0.8356435643564356,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is picking up something."
  },
  "001520": {
    "text": "someone kick twice doing karate",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.931555563378279,
      "Minus Multimodal Distance": -12.755766868591309,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6775178412208334e-05,
      "MoBERT-F": 0.49658939073054675,
      "MoBERT-N": 0.5149357742468131,
      "MoBERT-min(F/N)": 0.49658939073054675,
      "MoBERT-max(F/N)": 0.5149357742468131,
      "MotionCritic": -11.584941864013672,
      "VeMo (human-opt view)": 0.960200041675349,
      "VeMo (max entropy view)": 0.9433633459192564,
      "VeMo (min entropy view)": 0.960200041675349,
      "VeMo (random view)": 0.9433633459192564,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone kicks twice while doing karate."
  },
  "001523": {
    "text": "a person feeling the back of their head.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8425735864471563,
      "Minus Multimodal Distance": -11.20785140991211,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5642866603448056e-05,
      "MoBERT-F": 0.42143155210341293,
      "MoBERT-N": 0.5036061519485333,
      "MoBERT-min(F/N)": 0.42143155210341293,
      "MoBERT-max(F/N)": 0.5036061519485333,
      "MotionCritic": -10.952705383300781,
      "VeMo (human-opt view)": 0.6797385620915033,
      "VeMo (max entropy view)": 0.6797385620915033,
      "VeMo (min entropy view)": 0.9527378721632314,
      "VeMo (random view)": 0.9527378721632314,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is feeling the back of their head."
  },
  "001527": {
    "text": "a man walks forward in a snake like pattern.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.834758020063487,
      "Minus Multimodal Distance": -10.887277603149414,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.6371785998344421,
      "MoBERT-F": 0.7080082497461746,
      "MoBERT-N": 0.6304735500831794,
      "MoBERT-min(F/N)": 0.6304735500831794,
      "MoBERT-max(F/N)": 0.7080082497461746,
      "MotionCritic": -10.89918327331543,
      "VeMo (human-opt view)": 0.7310126582278481,
      "VeMo (max entropy view)": 0.7310126582278481,
      "VeMo (min entropy view)": 0.7976878612716763,
      "VeMo (random view)": 0.7310126582278481,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks forward in a snake - like pattern."
  },
  "001534": {
    "text": "a person is stretching its shoulders",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9489838577912445,
      "Minus Multimodal Distance": -6.735029697418213,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.505022712284699e-05,
      "MoBERT-F": 0.4438795461449566,
      "MoBERT-N": 0.4933000676736305,
      "MoBERT-min(F/N)": 0.4438795461449566,
      "MoBERT-max(F/N)": 0.4933000676736305,
      "MotionCritic": -4.064367771148682,
      "VeMo (human-opt view)": 0.9756429909725771,
      "VeMo (max entropy view)": 0.9626070607896386,
      "VeMo (min entropy view)": 0.9756429909725771,
      "VeMo (random view)": 0.9756429909725771,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is stretching their shoulders."
  },
  "001538": {
    "text": "a person walks up and tosses something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.9170882500532285,
      "Minus Multimodal Distance": -10.872279167175293,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.046346474438905716,
      "MoBERT-F": 0.5715214300264724,
      "MoBERT-N": 0.47798191334319323,
      "MoBERT-min(F/N)": 0.47798191334319323,
      "MoBERT-max(F/N)": 0.5715214300264724,
      "MotionCritic": -12.041952133178711,
      "VeMo (human-opt view)": 0.8442776735459663,
      "VeMo (max entropy view)": 0.8442776735459663,
      "VeMo (min entropy view)": 0.8593238822246456,
      "VeMo (random view)": 0.8593238822246456,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks up and tosses something."
  },
  "001548": {
    "text": "a person holds an object steady with their right arm and strums with their left arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.43993850443221744,
      "Minus Multimodal Distance": -6.039276123046875,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.5258064094232395e-05,
      "MoBERT-F": 0.3078347787529572,
      "MoBERT-N": 0.37948269051792133,
      "MoBERT-min(F/N)": 0.3078347787529572,
      "MoBERT-max(F/N)": 0.37948269051792133,
      "MotionCritic": -10.69571590423584,
      "VeMo (human-opt view)": 5.845796706720791e-05,
      "VeMo (max entropy view)": 0.00027951857988357446,
      "VeMo (min entropy view)": 5.845796706720791e-05,
      "VeMo (random view)": 0.00027951857988357446,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person holds an object steady with their right arm and strums with their left arm."
  },
  "001567": {
    "text": "a person throws their hands outwards then back in front of them.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7592156154550127,
      "Minus Multimodal Distance": -9.555557250976562,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.10402413457632065,
      "MoBERT-F": 0.6543236624855844,
      "MoBERT-N": 0.5060504237295601,
      "MoBERT-min(F/N)": 0.5060504237295601,
      "MoBERT-max(F/N)": 0.6543236624855844,
      "MotionCritic": -5.140169143676758,
      "VeMo (human-opt view)": 0.9604943721032885,
      "VeMo (max entropy view)": 0.9526047786917352,
      "VeMo (min entropy view)": 0.9604943721032885,
      "VeMo (random view)": 0.9604943721032885,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person throws their hands outwards and then back in front of them."
  },
  "001589": {
    "text": "a person runs on the spot.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5438587958958121,
      "Minus Multimodal Distance": -8.763216018676758,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.512072751414962e-05,
      "MoBERT-F": 0.4477505257535652,
      "MoBERT-N": 0.554156505345461,
      "MoBERT-min(F/N)": 0.4477505257535652,
      "MoBERT-max(F/N)": 0.554156505345461,
      "MotionCritic": -10.033733367919922,
      "VeMo (human-opt view)": 0.4538310412573674,
      "VeMo (max entropy view)": 0.4538310412573674,
      "VeMo (min entropy view)": 0.005887510960791683,
      "VeMo (random view)": 0.4538310412573674,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs on the spot."
  },
  "001614": {
    "text": "a person walks while dragging his feet.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4225783072905205,
      "Minus Multimodal Distance": -7.411235332489014,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.004393610637634993,
      "MoBERT-F": 0.6515316764125589,
      "MoBERT-N": 0.6597805379555604,
      "MoBERT-min(F/N)": 0.6515316764125589,
      "MoBERT-max(F/N)": 0.6597805379555604,
      "MotionCritic": -3.6017558574676514,
      "VeMo (human-opt view)": 0.9465868263473054,
      "VeMo (max entropy view)": 0.9097472924187726,
      "VeMo (min entropy view)": 0.9465868263473054,
      "VeMo (random view)": 0.9097472924187726,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks while dragging their feet."
  },
  "001632": {
    "text": "a man sits down and then stays still.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1627999490261498,
      "Minus Multimodal Distance": -10.487774848937988,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.622092794510536e-05,
      "MoBERT-F": 0.44701443028498494,
      "MoBERT-N": 0.4513010025658864,
      "MoBERT-min(F/N)": 0.44701443028498494,
      "MoBERT-max(F/N)": 0.4513010025658864,
      "MotionCritic": -14.186517715454102,
      "VeMo (human-opt view)": 0.9397369226063016,
      "VeMo (max entropy view)": 0.9397369226063016,
      "VeMo (min entropy view)": 0.9723243661699246,
      "VeMo (random view)": 0.9397369226063016,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man sits down and then stays still."
  },
  "001636": {
    "text": "a person steps to their left and sits down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2048505339161932,
      "Minus Multimodal Distance": -10.399820327758789,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.251244404760655e-05,
      "MoBERT-F": 0.427430077361471,
      "MoBERT-N": 0.4619947382663053,
      "MoBERT-min(F/N)": 0.427430077361471,
      "MoBERT-max(F/N)": 0.4619947382663053,
      "MotionCritic": -11.11420726776123,
      "VeMo (human-opt view)": 0.8873720136518771,
      "VeMo (max entropy view)": 0.8441145281018028,
      "VeMo (min entropy view)": 0.8873720136518771,
      "VeMo (random view)": 0.8441145281018028,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps to their left and sits down."
  },
  "001640": {
    "text": "person looks to be washing a window with both hands",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9338690817274139,
      "Minus Multimodal Distance": -8.183741569519043,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.231051207170822e-05,
      "MoBERT-F": 0.4594896840820991,
      "MoBERT-N": 0.48214569808393465,
      "MoBERT-min(F/N)": 0.4594896840820991,
      "MoBERT-max(F/N)": 0.48214569808393465,
      "MotionCritic": -9.333486557006836,
      "VeMo (human-opt view)": 0.07152496626180836,
      "VeMo (max entropy view)": 0.3769911504424779,
      "VeMo (min entropy view)": 0.07152496626180836,
      "VeMo (random view)": 0.07152496626180836,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person looks to be washing a window with both hands."
  },
  "001648": {
    "text": "a man uses his right hand to throw somthing with force.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9415256490081376,
      "Minus Multimodal Distance": -11.860258102416992,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0010805926285684109,
      "MoBERT-F": 0.5319285819151974,
      "MoBERT-N": 0.466986085200528,
      "MoBERT-min(F/N)": 0.466986085200528,
      "MoBERT-max(F/N)": 0.5319285819151974,
      "MotionCritic": -8.276814460754395,
      "VeMo (human-opt view)": 0.7551401869158878,
      "VeMo (max entropy view)": 0.7551401869158878,
      "VeMo (min entropy view)": 0.7880386983289358,
      "VeMo (random view)": 0.7880386983289358,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man uses his right hand to throw something with force."
  },
  "001664": {
    "text": "losing balance, moving backwards with both feet.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.382731279583448,
      "Minus Multimodal Distance": -7.525134563446045,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0007655738736502826,
      "MoBERT-F": 0.4059849289172456,
      "MoBERT-N": 0.430320886371759,
      "MoBERT-min(F/N)": 0.4059849289172456,
      "MoBERT-max(F/N)": 0.430320886371759,
      "MotionCritic": -9.375770568847656,
      "VeMo (human-opt view)": 0.7186147186147186,
      "VeMo (max entropy view)": 0.6657824933687002,
      "VeMo (min entropy view)": 0.7186147186147186,
      "VeMo (random view)": 0.6657824933687002,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Losing balance and moving backwards with both feet."
  },
  "001675": {
    "text": "person went around to sit on chair.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.7341441871447578,
      "Minus Multimodal Distance": -4.655237674713135,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2645448552793823e-05,
      "MoBERT-F": 0.36808676972347215,
      "MoBERT-N": 0.4665707026036854,
      "MoBERT-min(F/N)": 0.36808676972347215,
      "MoBERT-max(F/N)": 0.4665707026036854,
      "MotionCritic": -8.216313362121582,
      "VeMo (human-opt view)": 0.39232053422370616,
      "VeMo (max entropy view)": 0.5931558935361216,
      "VeMo (min entropy view)": 0.39232053422370616,
      "VeMo (random view)": 0.39232053422370616,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person went around and sat on the chair."
  },
  "001676": {
    "text": "a person places left hand on cround and gets into a seated position on the ground. the person uses left hand to brace themself and gets back to a standing position",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4804211152169595,
      "Minus Multimodal Distance": -10.211694717407227,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2565362087334506e-05,
      "MoBERT-F": 0.32966580929349376,
      "MoBERT-N": 0.48679921154220257,
      "MoBERT-min(F/N)": 0.32966580929349376,
      "MoBERT-max(F/N)": 0.48679921154220257,
      "MotionCritic": -5.266947269439697,
      "VeMo (human-opt view)": 0.02031015507753877,
      "VeMo (max entropy view)": 0.2946058091286307,
      "VeMo (min entropy view)": 0.02031015507753877,
      "VeMo (random view)": 0.02031015507753877,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person places their left hand on the ground and gets into a seated position on the ground. The person uses their left hand to brace themselves and gets back to a standing position."
  },
  "001705": {
    "text": "a person is pitching a baseball.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9370402853903582,
      "Minus Multimodal Distance": -10.923160552978516,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.01578505150973797,
      "MoBERT-F": 0.6575031701154004,
      "MoBERT-N": 0.4920264062932561,
      "MoBERT-min(F/N)": 0.4920264062932561,
      "MoBERT-max(F/N)": 0.6575031701154004,
      "MotionCritic": -8.437158584594727,
      "VeMo (human-opt view)": 0.53156146179402,
      "VeMo (max entropy view)": 0.53125,
      "VeMo (min entropy view)": 0.53156146179402,
      "VeMo (random view)": 0.53156146179402,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is pitching a baseball."
  },
  "001725": {
    "text": "a man limps to the right side of the room, then runs back to the left side",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8157108367103536,
      "Minus Multimodal Distance": -5.236698627471924,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9494867703760974e-05,
      "MoBERT-F": 0.5025343273336538,
      "MoBERT-N": 0.6273908952365902,
      "MoBERT-min(F/N)": 0.5025343273336538,
      "MoBERT-max(F/N)": 0.6273908952365902,
      "MotionCritic": -10.232641220092773,
      "VeMo (human-opt view)": 0.8813114754098361,
      "VeMo (max entropy view)": 0.8591885441527446,
      "VeMo (min entropy view)": 0.8813114754098361,
      "VeMo (random view)": 0.8591885441527446,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man limps to the right side of the room, then runs back to the left side."
  },
  "001738": {
    "text": "a person taps his right hand in the air twice. they then make a rowing movement on each side of their body and then outline a cloud infront of them",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.056524403121223,
      "Minus Multimodal Distance": -11.787382125854492,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.282786590512842e-05,
      "MoBERT-F": 0.354001846007558,
      "MoBERT-N": 0.5287308214718051,
      "MoBERT-min(F/N)": 0.354001846007558,
      "MoBERT-max(F/N)": 0.5287308214718051,
      "MotionCritic": -12.482356071472168,
      "VeMo (human-opt view)": 0.04722792607802875,
      "VeMo (max entropy view)": 0.28193832599118945,
      "VeMo (min entropy view)": 0.04722792607802875,
      "VeMo (random view)": 0.04722792607802875,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person taps his right hand in the air twice. Then, he makes a rowing movement on each side of his body and then outlines a cloud in front of him."
  },
  "001752": {
    "text": "a person stands still then they throw a football",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9655752710895877,
      "Minus Multimodal Distance": -10.126367568969727,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.3607443571090698,
      "MoBERT-F": 0.5738291862739469,
      "MoBERT-N": 0.5341851419077767,
      "MoBERT-min(F/N)": 0.5341851419077767,
      "MoBERT-max(F/N)": 0.5738291862739469,
      "MotionCritic": -3.6607742309570312,
      "VeMo (human-opt view)": 0.0004441385907628934,
      "VeMo (max entropy view)": 0.0007550180207131358,
      "VeMo (min entropy view)": 0.0004441385907628934,
      "VeMo (random view)": 0.0007550180207131358,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands still and then throws a football."
  },
  "001755": {
    "text": "person moves backwards towards the left then right as though he is playing a sport game like football or baseketball",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8311223279735778,
      "Minus Multimodal Distance": -11.717966079711914,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.05374632775783539,
      "MoBERT-F": 0.7074762071076359,
      "MoBERT-N": 0.6846314073854134,
      "MoBERT-min(F/N)": 0.6846314073854134,
      "MoBERT-max(F/N)": 0.7074762071076359,
      "MotionCritic": -8.858502388000488,
      "VeMo (human-opt view)": 0.4688995215311005,
      "VeMo (max entropy view)": 0.4688995215311005,
      "VeMo (min entropy view)": 0.26885245901639343,
      "VeMo (random view)": 0.26885245901639343,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person moves backwards, first towards the left and then to the right as though he is playing a sport such as football or basketball."
  },
  "001759": {
    "text": "he tries to clean the floor",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1130187037493453,
      "Minus Multimodal Distance": -8.729501724243164,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.4782769944285974e-05,
      "MoBERT-F": 0.4954231573485436,
      "MoBERT-N": 0.5731506255940666,
      "MoBERT-min(F/N)": 0.4954231573485436,
      "MoBERT-max(F/N)": 0.5731506255940666,
      "MotionCritic": -1.866812825202942,
      "VeMo (human-opt view)": 0.0011676587019052772,
      "VeMo (max entropy view)": 0.0011676587019052772,
      "VeMo (min entropy view)": 0.0008537626539821929,
      "VeMo (random view)": 0.0008537626539821929,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "He tries to clean the floor."
  },
  "001772": {
    "text": "a person walks up to shake with their right hand, turns slightly right to shake again, and turns right again to shake for a final time.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9107865624659868,
      "Minus Multimodal Distance": -3.6873316764831543,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5597302737878636e-05,
      "MoBERT-F": 0.36033476966487177,
      "MoBERT-N": 0.46323654121480945,
      "MoBERT-min(F/N)": 0.36033476966487177,
      "MoBERT-max(F/N)": 0.46323654121480945,
      "MotionCritic": -3.7238945960998535,
      "VeMo (human-opt view)": 0.4532871972318339,
      "VeMo (max entropy view)": 0.4532871972318339,
      "VeMo (min entropy view)": 0.7550371155885471,
      "VeMo (random view)": 0.4532871972318339,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks up and shakes with their right hand, then turns slightly to the right to shake again, and turns right once more to shake for a final time."
  },
  "001775": {
    "text": "a person dribbles a ball with one hand then the other and proceeds to shoot ball into goal with both hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.737971400927475,
      "Minus Multimodal Distance": -7.692399024963379,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.004277391824871302,
      "MoBERT-F": 0.6720769301918661,
      "MoBERT-N": 0.6431301487055929,
      "MoBERT-min(F/N)": 0.6431301487055929,
      "MoBERT-max(F/N)": 0.6720769301918661,
      "MotionCritic": -9.056352615356445,
      "VeMo (human-opt view)": 7.191988979540347e-06,
      "VeMo (max entropy view)": 7.191988979540347e-06,
      "VeMo (min entropy view)": 6.745945445860415e-06,
      "VeMo (random view)": 6.745945445860415e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person dribbles the ball with one hand, then the other, and proceeds to shoot the ball into the goal with both hands."
  },
  "001808": {
    "text": "the person is standing there.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5167735844498027,
      "Minus Multimodal Distance": -10.48556137084961,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.3058215194614604e-05,
      "MoBERT-F": 0.5099476457973285,
      "MoBERT-N": 0.5764595316380774,
      "MoBERT-min(F/N)": 0.5099476457973285,
      "MoBERT-max(F/N)": 0.5764595316380774,
      "MotionCritic": -5.650391101837158,
      "VeMo (human-opt view)": 0.9890099342515717,
      "VeMo (max entropy view)": 0.9808429118773946,
      "VeMo (min entropy view)": 0.9890099342515717,
      "VeMo (random view)": 0.9808429118773946,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is standing there."
  },
  "001835": {
    "text": "a person standing up strikes their hands together well above their head.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6559162210853416,
      "Minus Multimodal Distance": -9.228864669799805,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0648883804678917,
      "MoBERT-F": 0.6322406980249367,
      "MoBERT-N": 0.5249935479854138,
      "MoBERT-min(F/N)": 0.5249935479854138,
      "MoBERT-max(F/N)": 0.6322406980249367,
      "MotionCritic": -14.050776481628418,
      "VeMo (human-opt view)": 0.3069306930693069,
      "VeMo (max entropy view)": 0.3069306930693069,
      "VeMo (min entropy view)": 0.7053571428571429,
      "VeMo (random view)": 0.3069306930693069,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person standing up strikes their hands together well above their head."
  },
  "001840": {
    "text": "someone execures a roundhouse kick with their left foot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4422810588084352,
      "Minus Multimodal Distance": -10.79820728302002,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8640419244766235,
      "MoBERT-F": 0.7162570384459466,
      "MoBERT-N": 0.5153433044399892,
      "MoBERT-min(F/N)": 0.5153433044399892,
      "MoBERT-max(F/N)": 0.7162570384459466,
      "MotionCritic": -9.387928009033203,
      "VeMo (human-opt view)": 0.8352490421455939,
      "VeMo (max entropy view)": 0.8352490421455939,
      "VeMo (min entropy view)": 0.8668866886688669,
      "VeMo (random view)": 0.8352490421455939,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone executes a roundhouse kick with their left foot."
  },
  "001843": {
    "text": "a person standing in place lifts and waves with his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5350567646643006,
      "Minus Multimodal Distance": -6.3991899490356445,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.54227377101779e-05,
      "MoBERT-F": 0.28129603036708356,
      "MoBERT-N": 0.39657377489745,
      "MoBERT-min(F/N)": 0.28129603036708356,
      "MoBERT-max(F/N)": 0.39657377489745,
      "MotionCritic": -6.432891845703125,
      "VeMo (human-opt view)": 0.9966235539401096,
      "VeMo (max entropy view)": 0.9933032506538672,
      "VeMo (min entropy view)": 0.9966235539401096,
      "VeMo (random view)": 0.9933032506538672,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person standing in place lifts and waves with their right hand."
  },
  "001859": {
    "text": "a person walks forward, leans over an object, then stands back up and walks backward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9529688484954928,
      "Minus Multimodal Distance": -12.62610912322998,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.13815851509571075,
      "MoBERT-F": 0.667859006401651,
      "MoBERT-N": 0.5952381538952028,
      "MoBERT-min(F/N)": 0.5952381538952028,
      "MoBERT-max(F/N)": 0.667859006401651,
      "MotionCritic": 0.2781353294849396,
      "VeMo (human-opt view)": 0.6073131955484896,
      "VeMo (max entropy view)": 0.6073131955484896,
      "VeMo (min entropy view)": 0.651356993736952,
      "VeMo (random view)": 0.6073131955484896,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, leans over an object, then stands back up and walks backward."
  },
  "001882": {
    "text": "a person walks forward then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9461141030184689,
      "Minus Multimodal Distance": -12.576125144958496,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6258090656483546e-05,
      "MoBERT-F": 0.5015553449485034,
      "MoBERT-N": 0.6280661221957105,
      "MoBERT-min(F/N)": 0.5015553449485034,
      "MoBERT-max(F/N)": 0.6280661221957105,
      "MotionCritic": -7.456054210662842,
      "VeMo (human-opt view)": 0.9972123248472317,
      "VeMo (max entropy view)": 0.9919342084453582,
      "VeMo (min entropy view)": 0.9972123248472317,
      "VeMo (random view)": 0.9919342084453582,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and then stops."
  },
  "001888": {
    "text": "the person is kneeling down on all fours to begin to crawl",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.508553229169267,
      "Minus Multimodal Distance": -5.433490753173828,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.2201891839504242,
      "MoBERT-F": 0.6550007775556528,
      "MoBERT-N": 0.5890154774357195,
      "MoBERT-min(F/N)": 0.5890154774357195,
      "MoBERT-max(F/N)": 0.6550007775556528,
      "MotionCritic": -7.775049686431885,
      "VeMo (human-opt view)": 0.9970282592243941,
      "VeMo (max entropy view)": 0.9966291601386317,
      "VeMo (min entropy view)": 0.9970282592243941,
      "VeMo (random view)": 0.9970282592243941,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is kneeling down on all fours to begin crawling."
  },
  "001897": {
    "text": "person aims and throws a baseball",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3224309194966588,
      "Minus Multimodal Distance": -5.357763290405273,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9030104875564575,
      "MoBERT-F": 0.7535251686484872,
      "MoBERT-N": 0.627342580877643,
      "MoBERT-min(F/N)": 0.627342580877643,
      "MoBERT-max(F/N)": 0.7535251686484872,
      "MotionCritic": -11.462610244750977,
      "VeMo (human-opt view)": 0.3784355179704017,
      "VeMo (max entropy view)": 0.3784355179704017,
      "VeMo (min entropy view)": 0.71939736346516,
      "VeMo (random view)": 0.71939736346516,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person aims and throws a baseball."
  },
  "001906": {
    "text": "the person was laying  down and then they got up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.839781016770383,
      "Minus Multimodal Distance": -11.631519317626953,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.758597292995546e-05,
      "MoBERT-F": 0.5039345070982671,
      "MoBERT-N": 0.5181361868713454,
      "MoBERT-min(F/N)": 0.5039345070982671,
      "MoBERT-max(F/N)": 0.5181361868713454,
      "MotionCritic": -4.227020740509033,
      "VeMo (human-opt view)": 0.9992885609413661,
      "VeMo (max entropy view)": 0.9989690150213403,
      "VeMo (min entropy view)": 0.9992885609413661,
      "VeMo (random view)": 0.9989690150213403,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was lying down and then they got up."
  },
  "001983": {
    "text": "a person walks and steps over something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.324021018094489,
      "Minus Multimodal Distance": -11.807147026062012,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.738370262202807e-05,
      "MoBERT-F": 0.4193973047001198,
      "MoBERT-N": 0.5787474813371348,
      "MoBERT-min(F/N)": 0.4193973047001198,
      "MoBERT-max(F/N)": 0.5787474813371348,
      "MotionCritic": -1.9730552434921265,
      "VeMo (human-opt view)": 0.9241179313678105,
      "VeMo (max entropy view)": 0.9196956889264581,
      "VeMo (min entropy view)": 0.9241179313678105,
      "VeMo (random view)": 0.9196956889264581,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks and steps over something."
  },
  "002009": {
    "text": "a person is using his left arm moving it up and down leaving his elbow still.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3402198371124414,
      "Minus Multimodal Distance": -11.47662353515625,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00020414592290762812,
      "MoBERT-F": 0.5372695392608023,
      "MoBERT-N": 0.5683306116724954,
      "MoBERT-min(F/N)": 0.5372695392608023,
      "MoBERT-max(F/N)": 0.5683306116724954,
      "MotionCritic": -10.660270690917969,
      "VeMo (human-opt view)": 0.7658767772511849,
      "VeMo (max entropy view)": 0.7658767772511849,
      "VeMo (min entropy view)": 0.7671232876712328,
      "VeMo (random view)": 0.7658767772511849,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is using his left arm, moving it up and down while keeping his elbow still."
  },
  "002024": {
    "text": "a person doing air kicks with his right feet.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1687406752107183,
      "Minus Multimodal Distance": -9.721760749816895,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6000390789704397e-05,
      "MoBERT-F": 0.38719326918263364,
      "MoBERT-N": 0.46136924175622945,
      "MoBERT-min(F/N)": 0.38719326918263364,
      "MoBERT-max(F/N)": 0.46136924175622945,
      "MotionCritic": -4.577666282653809,
      "VeMo (human-opt view)": 0.7980769230769231,
      "VeMo (max entropy view)": 0.7980769230769231,
      "VeMo (min entropy view)": 0.8595988538681948,
      "VeMo (random view)": 0.8595988538681948,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is doing air kicks with his right foot."
  },
  "002055": {
    "text": "a person bends at the waist, picks something up, then puts it down at waist level",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8623827304875908,
      "Minus Multimodal Distance": -10.319519996643066,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.97320875688456e-05,
      "MoBERT-F": 0.4895631601855637,
      "MoBERT-N": 0.5381931034847325,
      "MoBERT-min(F/N)": 0.4895631601855637,
      "MoBERT-max(F/N)": 0.5381931034847325,
      "MotionCritic": -3.237635612487793,
      "VeMo (human-opt view)": 0.608130081300813,
      "VeMo (max entropy view)": 0.608130081300813,
      "VeMo (min entropy view)": 0.6666666666666666,
      "VeMo (random view)": 0.608130081300813,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends at the waist, picks something up, and then puts it down at waist level."
  },
  "002087": {
    "text": "a person walks forward while holding out their arms for balance",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9224085618810722,
      "Minus Multimodal Distance": -4.913389682769775,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.3628064304357395e-05,
      "MoBERT-F": 0.39381170949475675,
      "MoBERT-N": 0.5107634403694211,
      "MoBERT-min(F/N)": 0.39381170949475675,
      "MoBERT-max(F/N)": 0.5107634403694211,
      "MotionCritic": -4.021788597106934,
      "VeMo (human-opt view)": 0.9919369288657947,
      "VeMo (max entropy view)": 0.9705433122408903,
      "VeMo (min entropy view)": 0.9919369288657947,
      "VeMo (random view)": 0.9705433122408903,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward while holding out their arms for balance."
  },
  "002103": {
    "text": "a man steps forward and does a handstand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3442715068265148,
      "Minus Multimodal Distance": -9.321019172668457,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.2796681225299835,
      "MoBERT-F": 0.6028146622079948,
      "MoBERT-N": 0.5552606734066037,
      "MoBERT-min(F/N)": 0.5552606734066037,
      "MoBERT-max(F/N)": 0.6028146622079948,
      "MotionCritic": -4.707739353179932,
      "VeMo (human-opt view)": 0.9951209050453059,
      "VeMo (max entropy view)": 0.9919465126880413,
      "VeMo (min entropy view)": 0.9951209050453059,
      "VeMo (random view)": 0.9951209050453059,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man steps forward and does a handstand."
  },
  "002122": {
    "text": "a person stretches their back by turning their arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2276509512968632,
      "Minus Multimodal Distance": -8.914109230041504,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0265203844755888,
      "MoBERT-F": 0.6538771560993951,
      "MoBERT-N": 0.5916047860170043,
      "MoBERT-min(F/N)": 0.5916047860170043,
      "MoBERT-max(F/N)": 0.6538771560993951,
      "MotionCritic": -6.388038635253906,
      "VeMo (human-opt view)": 0.8806539509536785,
      "VeMo (max entropy view)": 0.8806539509536785,
      "VeMo (min entropy view)": 0.928537170263789,
      "VeMo (random view)": 0.8806539509536785,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stretches their back by rotating their arms."
  },
  "002139": {
    "text": "it looks like he is lifting a box on a shelf and grabbing another box to put on shelf as well.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.111103818196945,
      "Minus Multimodal Distance": -7.582625865936279,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.3221247071633115e-05,
      "MoBERT-F": 0.4727294741365804,
      "MoBERT-N": 0.4801562769599839,
      "MoBERT-min(F/N)": 0.4727294741365804,
      "MoBERT-max(F/N)": 0.4801562769599839,
      "MotionCritic": -5.533054828643799,
      "VeMo (human-opt view)": 0.00011225644360895556,
      "VeMo (max entropy view)": 0.0002036415573836802,
      "VeMo (min entropy view)": 0.00011225644360895556,
      "VeMo (random view)": 0.0002036415573836802,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "It looks like he is lifting a box onto a shelf and grabbing another box to put on the shelf as well."
  },
  "002149": {
    "text": "a person steps forward, turns to the side and gestures several times.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5055224235399007,
      "Minus Multimodal Distance": -3.609739303588867,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.343812411709223e-05,
      "MoBERT-F": 0.36375526244290085,
      "MoBERT-N": 0.4852445423095325,
      "MoBERT-min(F/N)": 0.36375526244290085,
      "MoBERT-max(F/N)": 0.4852445423095325,
      "MotionCritic": -7.4454426765441895,
      "VeMo (human-opt view)": 0.00026068085855323784,
      "VeMo (max entropy view)": 0.002310743231591654,
      "VeMo (min entropy view)": 0.00026068085855323784,
      "VeMo (random view)": 0.00026068085855323784,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person steps forward, turns to the side, and gestures several times."
  },
  "002168": {
    "text": "a person stands up straight from a bent over position, touches his head with his left arm, then bends his torso to the right and swings forward with his arms dangling in front of him. i",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0603484483268868,
      "Minus Multimodal Distance": -11.076593399047852,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.03434699162608e-05,
      "MoBERT-F": 0.41574549229188984,
      "MoBERT-N": 0.47261055148225783,
      "MoBERT-min(F/N)": 0.41574549229188984,
      "MoBERT-max(F/N)": 0.47261055148225783,
      "MotionCritic": -7.3710198402404785,
      "VeMo (human-opt view)": 0.03956343792633015,
      "VeMo (max entropy view)": 0.03956343792633015,
      "VeMo (min entropy view)": 0.0058956916099773245,
      "VeMo (random view)": 0.03956343792633015,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands up straight from a bent - over position, touches their head with their left arm, then bends their torso to the right and swings forward with their arms dangling in front of them."
  },
  "002171": {
    "text": "a person who sits down on there knees",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.457280208813807,
      "Minus Multimodal Distance": -3.43886661529541,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4808543457766064e-05,
      "MoBERT-F": 0.34804209006367903,
      "MoBERT-N": 0.40019601306423497,
      "MoBERT-min(F/N)": 0.34804209006367903,
      "MoBERT-max(F/N)": 0.40019601306423497,
      "MotionCritic": -4.3452982902526855,
      "VeMo (human-opt view)": 0.9433419614610297,
      "VeMo (max entropy view)": 0.9433419614610297,
      "VeMo (min entropy view)": 0.9875407322215833,
      "VeMo (random view)": 0.9433419614610297,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who gets down on their knees"
  },
  "002178": {
    "text": "the person tripped over his foot.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4895665664048139,
      "Minus Multimodal Distance": -15.74508285522461,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3288575903279707e-05,
      "MoBERT-F": 0.4373624672050427,
      "MoBERT-N": 0.5775156587045238,
      "MoBERT-min(F/N)": 0.4373624672050427,
      "MoBERT-max(F/N)": 0.5775156587045238,
      "MotionCritic": -11.18388557434082,
      "VeMo (human-opt view)": 0.852037169406719,
      "VeMo (max entropy view)": 0.8173913043478261,
      "VeMo (min entropy view)": 0.852037169406719,
      "VeMo (random view)": 0.852037169406719,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person tripped over his foot."
  },
  "002209": {
    "text": "a man walks in a forward arc, skewing to the right side of the screen.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8981818366733811,
      "Minus Multimodal Distance": -8.885366439819336,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3120308469515294e-05,
      "MoBERT-F": 0.39799448442765556,
      "MoBERT-N": 0.5090126550718166,
      "MoBERT-min(F/N)": 0.39799448442765556,
      "MoBERT-max(F/N)": 0.5090126550718166,
      "MotionCritic": -12.65336799621582,
      "VeMo (human-opt view)": 0.8181818181818182,
      "VeMo (max entropy view)": 0.8181818181818182,
      "VeMo (min entropy view)": 0.8599605522682445,
      "VeMo (random view)": 0.8599605522682445,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man walks in a forward arc, skewing to the right side of the screen."
  },
  "002235": {
    "text": "person leans forward goes onto knees whilst first putting left hand on ground for support and stays on knees",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7276378388266855,
      "Minus Multimodal Distance": -6.276455402374268,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7993852199870162e-05,
      "MoBERT-F": 0.43717068714898655,
      "MoBERT-N": 0.5957550601935495,
      "MoBERT-min(F/N)": 0.43717068714898655,
      "MoBERT-max(F/N)": 0.5957550601935495,
      "MotionCritic": -6.689728736877441,
      "VeMo (human-opt view)": 0.9668698648290485,
      "VeMo (max entropy view)": 0.9284253578732107,
      "VeMo (min entropy view)": 0.9668698648290485,
      "VeMo (random view)": 0.9668698648290485,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person leans forward, goes down onto their knees, first placing their left hand on the ground for support, and then remains on their knees."
  },
  "002242": {
    "text": "a person crawling from right to left and vice versa",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.9934202924223858,
      "Minus Multimodal Distance": -3.8967995643615723,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6165376766584814e-05,
      "MoBERT-F": 0.3256997423238756,
      "MoBERT-N": 0.40543770247911554,
      "MoBERT-min(F/N)": 0.3256997423238756,
      "MoBERT-max(F/N)": 0.40543770247911554,
      "MotionCritic": -5.544504165649414,
      "VeMo (human-opt view)": 0.00020435503281033583,
      "VeMo (max entropy view)": 0.002792236891999035,
      "VeMo (min entropy view)": 0.00020435503281033583,
      "VeMo (random view)": 0.00020435503281033583,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person crawling from right to left and vice versa."
  },
  "002246": {
    "text": "a person walks up stairs turns left and walks back down stairs.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4705432460212857,
      "Minus Multimodal Distance": -8.489014625549316,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00290817697532475,
      "MoBERT-F": 0.5180694229572056,
      "MoBERT-N": 0.5923353170632366,
      "MoBERT-min(F/N)": 0.5180694229572056,
      "MoBERT-max(F/N)": 0.5923353170632366,
      "MotionCritic": -16.34882354736328,
      "VeMo (human-opt view)": 0.000754047460634287,
      "VeMo (max entropy view)": 0.000754047460634287,
      "VeMo (min entropy view)": 0.00026011138468335894,
      "VeMo (random view)": 0.000754047460634287,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks up the stairs, turns left, and walks back down the stairs."
  },
  "002247": {
    "text": "a person walking and helping maintain their balance and support,  from holding onto a side rail or wall.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2407394926475146,
      "Minus Multimodal Distance": -8.9774751663208,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.6665627451147884e-05,
      "MoBERT-F": 0.44135843195994967,
      "MoBERT-N": 0.5562955820868405,
      "MoBERT-min(F/N)": 0.44135843195994967,
      "MoBERT-max(F/N)": 0.5562955820868405,
      "MotionCritic": -10.78518295288086,
      "VeMo (human-opt view)": 0.665680473372781,
      "VeMo (max entropy view)": 0.6656626506024096,
      "VeMo (min entropy view)": 0.665680473372781,
      "VeMo (random view)": 0.6656626506024096,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking and helps maintain their balance and support by holding onto a side rail or wall."
  },
  "002259": {
    "text": "a person stands using his left hand to play a guitar moving it up and down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.43594736607795653,
      "Minus Multimodal Distance": -10.896614074707031,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3387618057313375e-05,
      "MoBERT-F": 0.3624559960808761,
      "MoBERT-N": 0.4883517367417303,
      "MoBERT-min(F/N)": 0.3624559960808761,
      "MoBERT-max(F/N)": 0.4883517367417303,
      "MotionCritic": -2.8894896507263184,
      "VeMo (human-opt view)": 7.191590666666083e-06,
      "VeMo (max entropy view)": 1.2634506469936387e-05,
      "VeMo (min entropy view)": 7.191590666666083e-06,
      "VeMo (random view)": 1.2634506469936387e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands and uses his left hand to play the guitar, moving it up and down."
  },
  "002272": {
    "text": "someone getting into position to start catching and tossing an object.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0197515831914299,
      "Minus Multimodal Distance": -4.431222438812256,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.005904554855078459,
      "MoBERT-F": 0.6430985583880615,
      "MoBERT-N": 0.5536165703111684,
      "MoBERT-min(F/N)": 0.5536165703111684,
      "MoBERT-max(F/N)": 0.6430985583880615,
      "MotionCritic": -9.144757270812988,
      "VeMo (human-opt view)": 0.7052767052767053,
      "VeMo (max entropy view)": 0.7052767052767053,
      "VeMo (min entropy view)": 0.7881773399014779,
      "VeMo (random view)": 0.7881773399014779,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone is getting into position to start catching and tossing an object."
  },
  "002315": {
    "text": "a stick fogure holds their arms straight up and then quickly puts them back down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3241213221409807,
      "Minus Multimodal Distance": -5.562957286834717,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 9.324813436251134e-05,
      "MoBERT-F": 0.4355429530098874,
      "MoBERT-N": 0.49598468263160056,
      "MoBERT-min(F/N)": 0.4355429530098874,
      "MoBERT-max(F/N)": 0.49598468263160056,
      "MotionCritic": -14.463103294372559,
      "VeMo (human-opt view)": 0.13333333333333333,
      "VeMo (max entropy view)": 0.3346456692913386,
      "VeMo (min entropy view)": 0.13333333333333333,
      "VeMo (random view)": 0.3346456692913386,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A stick figure holds its arms straight up and then quickly puts them back down."
  },
  "002317": {
    "text": "a person steps around something then sits down on the item and rests each hand on the corresponding thigh",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.8945061401688876,
      "Minus Multimodal Distance": -9.528010368347168,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9586592063424177e-05,
      "MoBERT-F": 0.38451156205140347,
      "MoBERT-N": 0.4884179680578108,
      "MoBERT-min(F/N)": 0.38451156205140347,
      "MoBERT-max(F/N)": 0.4884179680578108,
      "MotionCritic": -6.4027204513549805,
      "VeMo (human-opt view)": 0.9148191365227538,
      "VeMo (max entropy view)": 0.9148191365227538,
      "VeMo (min entropy view)": 0.9432782492184011,
      "VeMo (random view)": 0.9432782492184011,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps around something, then sits down on the item and rests each hand on the corresponding thigh."
  },
  "002318": {
    "text": "pacing back and forth from left to right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.513686249499158,
      "Minus Multimodal Distance": -5.2983174324035645,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.11647146195173264,
      "MoBERT-F": 0.6783647305557274,
      "MoBERT-N": 0.6826450485908275,
      "MoBERT-min(F/N)": 0.6783647305557274,
      "MoBERT-max(F/N)": 0.6826450485908275,
      "MotionCritic": -9.773232460021973,
      "VeMo (human-opt view)": 0.989040758527275,
      "VeMo (max entropy view)": 0.9850079744816587,
      "VeMo (min entropy view)": 0.989040758527275,
      "VeMo (random view)": 0.9850079744816587,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Pacing back and forth from left to right."
  },
  "002323": {
    "text": "a person standing up throws something forward from above their head, then throws something again forward from above their head with more force which makes them take one step forward with their right foot.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1984845036811622,
      "Minus Multimodal Distance": -3.917034864425659,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.3568208386423066e-05,
      "MoBERT-F": 0.5780895207191508,
      "MoBERT-N": 0.5043586544197021,
      "MoBERT-min(F/N)": 0.5043586544197021,
      "MoBERT-max(F/N)": 0.5780895207191508,
      "MotionCritic": -6.884141445159912,
      "VeMo (human-opt view)": 0.11291139240506329,
      "VeMo (max entropy view)": 0.5615942028985508,
      "VeMo (min entropy view)": 0.11291139240506329,
      "VeMo (random view)": 0.11291139240506329,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person standing up throws something forward from above their head. Then, they throw something again forward from above their head with more force, causing them to take one step forward with their right foot."
  },
  "002332": {
    "text": "a person walks straight forward quickly",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2264377035391487,
      "Minus Multimodal Distance": -9.158181190490723,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3177968614618294e-05,
      "MoBERT-F": 0.39836822257621984,
      "MoBERT-N": 0.5413621715896477,
      "MoBERT-min(F/N)": 0.39836822257621984,
      "MoBERT-max(F/N)": 0.5413621715896477,
      "MotionCritic": -4.590198040008545,
      "VeMo (human-opt view)": 0.8178025034770514,
      "VeMo (max entropy view)": 0.8080808080808081,
      "VeMo (min entropy view)": 0.8178025034770514,
      "VeMo (random view)": 0.8178025034770514,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks straight forward quickly."
  },
  "002357": {
    "text": "a headless line figure takes four steps forward, down a ramp, toward the viewer.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.061434557739369,
      "Minus Multimodal Distance": -6.742183208465576,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6221405278192833e-05,
      "MoBERT-F": 0.4515669569037055,
      "MoBERT-N": 0.5256751245732219,
      "MoBERT-min(F/N)": 0.4515669569037055,
      "MoBERT-max(F/N)": 0.5256751245732219,
      "MotionCritic": -7.055450439453125,
      "VeMo (human-opt view)": 0.23439099283520984,
      "VeMo (max entropy view)": 0.23439099283520984,
      "VeMo (min entropy view)": 0.23404255319148937,
      "VeMo (random view)": 0.23439099283520984,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure takes four steps forward, down a ramp, toward the viewer."
  },
  "002364": {
    "text": "a person standing forward doing leg kicks.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7737714981301836,
      "Minus Multimodal Distance": -5.9852776527404785,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4101576855173334e-05,
      "MoBERT-F": 0.3750828236444866,
      "MoBERT-N": 0.5738089103316629,
      "MoBERT-min(F/N)": 0.3750828236444866,
      "MoBERT-max(F/N)": 0.5738089103316629,
      "MotionCritic": -13.333396911621094,
      "VeMo (human-opt view)": 3.319345682013108e-05,
      "VeMo (max entropy view)": 0.002472320756745136,
      "VeMo (min entropy view)": 3.319345682013108e-05,
      "VeMo (random view)": 3.319345682013108e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is standing facing forward and doing leg kicks."
  },
  "002383": {
    "text": "person walks forward slowly and hesitates before standing upright",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6237720552479346,
      "Minus Multimodal Distance": -5.28696346282959,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8097088943468407e-05,
      "MoBERT-F": 0.49257753560113404,
      "MoBERT-N": 0.5691963142513718,
      "MoBERT-min(F/N)": 0.49257753560113404,
      "MoBERT-max(F/N)": 0.5691963142513718,
      "MotionCritic": -4.381289482116699,
      "VeMo (human-opt view)": 0.9820566761539413,
      "VeMo (max entropy view)": 0.9627965043695381,
      "VeMo (min entropy view)": 0.9820566761539413,
      "VeMo (random view)": 0.9627965043695381,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person walks forward slowly and hesitates before standing upright."
  },
  "002388": {
    "text": "someone carefully looks behind them while backing up, then uses both hands to dip into a slight sitting motion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2750424597622607,
      "Minus Multimodal Distance": -11.92905044555664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5622167587280273,
      "MoBERT-F": 0.7415455540724043,
      "MoBERT-N": 0.5122086937590029,
      "MoBERT-min(F/N)": 0.5122086937590029,
      "MoBERT-max(F/N)": 0.7415455540724043,
      "MotionCritic": -6.201162815093994,
      "VeMo (human-opt view)": 0.42290748898678415,
      "VeMo (max entropy view)": 0.42290748898678415,
      "VeMo (min entropy view)": 0.26877470355731226,
      "VeMo (random view)": 0.42290748898678415,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone carefully looks behind themselves while backing up, then uses both hands to dip into a slight sitting motion."
  },
  "002397": {
    "text": "a person paces from left to right and vice versa.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6332351006735228,
      "Minus Multimodal Distance": -10.839173316955566,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8154529929161072,
      "MoBERT-F": 0.7272321651260578,
      "MoBERT-N": 0.6026441092709947,
      "MoBERT-min(F/N)": 0.6026441092709947,
      "MoBERT-max(F/N)": 0.7272321651260578,
      "MotionCritic": -7.2051100730896,
      "VeMo (human-opt view)": 0.9883105697059186,
      "VeMo (max entropy view)": 0.9769706540334254,
      "VeMo (min entropy view)": 0.9883105697059186,
      "VeMo (random view)": 0.9883105697059186,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person paces from left to right and vice versa."
  },
  "002470": {
    "text": "a person walks clockwise from the 3 o'clock position to the 11 o'clock position.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.704925297591635,
      "Minus Multimodal Distance": -12.023841857910156,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.14282524585723877,
      "MoBERT-F": 0.6613315182703182,
      "MoBERT-N": 0.6256652155880664,
      "MoBERT-min(F/N)": 0.6256652155880664,
      "MoBERT-max(F/N)": 0.6613315182703182,
      "MotionCritic": -11.584304809570312,
      "VeMo (human-opt view)": 0.5463576158940397,
      "VeMo (max entropy view)": 0.4682203389830508,
      "VeMo (min entropy view)": 0.5463576158940397,
      "VeMo (random view)": 0.4682203389830508,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks clockwise from the 3 o'clock position to the 11 o'clock position."
  },
  "002486": {
    "text": "a person is walking forwards, but stumbles and steps back, then carries on forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1193089929013371,
      "Minus Multimodal Distance": -5.9842705726623535,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.002033696975558996,
      "MoBERT-F": 0.6496176872478574,
      "MoBERT-N": 0.6248185059857178,
      "MoBERT-min(F/N)": 0.6248185059857178,
      "MoBERT-max(F/N)": 0.6496176872478574,
      "MotionCritic": -7.3081207275390625,
      "VeMo (human-opt view)": 0.9604943721032885,
      "VeMo (max entropy view)": 0.9579572969403478,
      "VeMo (min entropy view)": 0.9604943721032885,
      "VeMo (random view)": 0.9604943721032885,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking forward, but stumbles and steps back, then carries on forward."
  },
  "002514": {
    "text": "a person holds both their hands up as if to look at something closely.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5640231192897234,
      "Minus Multimodal Distance": -6.705857753753662,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3760758267599158e-05,
      "MoBERT-F": 0.37747913675498745,
      "MoBERT-N": 0.46114307566974716,
      "MoBERT-min(F/N)": 0.37747913675498745,
      "MoBERT-max(F/N)": 0.46114307566974716,
      "MotionCritic": -8.130825996398926,
      "VeMo (human-opt view)": 0.9867722638806976,
      "VeMo (max entropy view)": 0.9867722638806976,
      "VeMo (min entropy view)": 0.9928721174004193,
      "VeMo (random view)": 0.9867722638806976,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person holds both hands up as if to look at something closely."
  },
  "002517": {
    "text": "the figure stays put but moves arms upward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7695225278256742,
      "Minus Multimodal Distance": -6.046585559844971,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.7899299059063196e-05,
      "MoBERT-F": 0.42960942478429176,
      "MoBERT-N": 0.5071650461641799,
      "MoBERT-min(F/N)": 0.42960942478429176,
      "MoBERT-max(F/N)": 0.5071650461641799,
      "MotionCritic": -6.149364471435547,
      "VeMo (human-opt view)": 0.9048275862068965,
      "VeMo (max entropy view)": 0.9048275862068965,
      "VeMo (min entropy view)": 0.9152129817444219,
      "VeMo (random view)": 0.9048275862068965,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The figure stays put but moves its arms upward."
  },
  "002530": {
    "text": "a person walks towards the camera.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8742859866782513,
      "Minus Multimodal Distance": -4.0676188468933105,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00010996440687449649,
      "MoBERT-F": 0.5557513374682319,
      "MoBERT-N": 0.6141016304681222,
      "MoBERT-min(F/N)": 0.5557513374682319,
      "MoBERT-max(F/N)": 0.6141016304681222,
      "MotionCritic": -0.46216216683387756,
      "VeMo (human-opt view)": 0.717607973421927,
      "VeMo (max entropy view)": 0.33457943925233646,
      "VeMo (min entropy view)": 0.717607973421927,
      "VeMo (random view)": 0.33457943925233646,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks towards the camera."
  },
  "002542": {
    "text": "a man leans forward to pick up an object slightly to his left, and places it down slightly to his right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.40390120769340876,
      "Minus Multimodal Distance": -9.66549015045166,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5597497369744815e-05,
      "MoBERT-F": 0.40512221617653754,
      "MoBERT-N": 0.5832253215605165,
      "MoBERT-min(F/N)": 0.40512221617653754,
      "MoBERT-max(F/N)": 0.5832253215605165,
      "MotionCritic": -1.1319953203201294,
      "VeMo (human-opt view)": 0.8870523415977961,
      "VeMo (max entropy view)": 0.717741935483871,
      "VeMo (min entropy view)": 0.8870523415977961,
      "VeMo (random view)": 0.717741935483871,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man leans forward to pick up an object slightly to his left and places it down slightly to his right."
  },
  "002544": {
    "text": "a person lifts up their arm at a 120 degree angle twice and then reverts their arm to the opposite lower part of their body.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.863791411551409,
      "Minus Multimodal Distance": -9.942902565002441,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.393455542915035e-05,
      "MoBERT-F": 0.3497188613618578,
      "MoBERT-N": 0.42316427185775823,
      "MoBERT-min(F/N)": 0.3497188613618578,
      "MoBERT-max(F/N)": 0.42316427185775823,
      "MotionCritic": -0.8856472373008728,
      "VeMo (human-opt view)": 0.9151157512482978,
      "VeMo (max entropy view)": 0.9151157512482978,
      "VeMo (min entropy view)": 0.9284064665127021,
      "VeMo (random view)": 0.9284064665127021,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lifts their arm up at a 120 - degree angle twice and then reverts their arm to the opposite lower part of their body."
  },
  "002550": {
    "text": "a person walks forward in an askew line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9207775285554579,
      "Minus Multimodal Distance": -15.277304649353027,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.7779638660140336e-05,
      "MoBERT-F": 0.5571525179928175,
      "MoBERT-N": 0.6438410014709884,
      "MoBERT-min(F/N)": 0.5571525179928175,
      "MoBERT-max(F/N)": 0.6438410014709884,
      "MotionCritic": -7.320993900299072,
      "VeMo (human-opt view)": 0.9364548494983278,
      "VeMo (max entropy view)": 0.9364548494983278,
      "VeMo (min entropy view)": 0.9466840052015605,
      "VeMo (random view)": 0.9364548494983278,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward in an askew line."
  },
  "002606": {
    "text": "a man is shadowboxing while standing still.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8194782926502815,
      "Minus Multimodal Distance": -6.595427989959717,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3877057174104266e-05,
      "MoBERT-F": 0.4209779137950497,
      "MoBERT-N": 0.49619581652541245,
      "MoBERT-min(F/N)": 0.4209779137950497,
      "MoBERT-max(F/N)": 0.49619581652541245,
      "MotionCritic": -10.19146728515625,
      "VeMo (human-opt view)": 0.9283551967709385,
      "VeMo (max entropy view)": 0.9283551967709385,
      "VeMo (min entropy view)": 0.9363402797602055,
      "VeMo (random view)": 0.9363402797602055,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is shadowboxing while standing still."
  },
  "002627": {
    "text": "a person dancing, starting in a t pose, the. turns, continues to dance and finishes with another turn",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8460338322828487,
      "Minus Multimodal Distance": -10.062422752380371,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4530796508770436e-05,
      "MoBERT-F": 0.35657428293581056,
      "MoBERT-N": 0.4934478787157405,
      "MoBERT-min(F/N)": 0.35657428293581056,
      "MoBERT-max(F/N)": 0.4934478787157405,
      "MotionCritic": -12.673690795898438,
      "VeMo (human-opt view)": 0.004618719954755397,
      "VeMo (max entropy view)": 0.0052282040412604215,
      "VeMo (min entropy view)": 0.004618719954755397,
      "VeMo (random view)": 0.004618719954755397,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is dancing. They start in a T - pose, then turn, continue to dance, and finish with another turn."
  },
  "002647": {
    "text": "person walks on hands and knees then stands up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.9408914247615572,
      "Minus Multimodal Distance": -5.468052387237549,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.724198854295537e-05,
      "MoBERT-F": 0.3082178274646986,
      "MoBERT-N": 0.3255302918738398,
      "MoBERT-min(F/N)": 0.3082178274646986,
      "MoBERT-max(F/N)": 0.3255302918738398,
      "MotionCritic": -6.10911226272583,
      "VeMo (human-opt view)": 0.4228456913827655,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.4228456913827655,
      "VeMo (random view)": 0.4228456913827655,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks on their hands and knees and then stands up."
  },
  "002656": {
    "text": "a person pers someing with right hand then scratches something with left have and then steps backwards",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0809337966132597,
      "Minus Multimodal Distance": -3.7906432151794434,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0022847503423690796,
      "MoBERT-F": 0.45276710133086273,
      "MoBERT-N": 0.46703164281210285,
      "MoBERT-min(F/N)": 0.45276710133086273,
      "MoBERT-max(F/N)": 0.46703164281210285,
      "MotionCritic": -13.34552001953125,
      "VeMo (human-opt view)": 0.6226415094339622,
      "VeMo (max entropy view)": 0.6226415094339622,
      "VeMo (min entropy view)": 0.37658227848101267,
      "VeMo (random view)": 0.37658227848101267,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does something with the right hand, then scratches something with the left hand, and then steps backwards."
  },
  "002658": {
    "text": "person swings their right arm and then has an injury on the elbow.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.825773209882559,
      "Minus Multimodal Distance": -9.450661659240723,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00010986995766870677,
      "MoBERT-F": 0.5402475502190645,
      "MoBERT-N": 0.5961113118654308,
      "MoBERT-min(F/N)": 0.5402475502190645,
      "MoBERT-max(F/N)": 0.5961113118654308,
      "MotionCritic": -5.670063018798828,
      "VeMo (human-opt view)": 0.5623003194888179,
      "VeMo (max entropy view)": 0.5623003194888179,
      "VeMo (min entropy view)": 0.5771604938271605,
      "VeMo (random view)": 0.5771604938271605,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person swings their right arm and then sustains an injury to the elbow."
  },
  "002661": {
    "text": "a person throws something upwards with two hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3686018826615611,
      "Minus Multimodal Distance": -7.7505903244018555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.3187633156776428,
      "MoBERT-F": 0.6425043296941383,
      "MoBERT-N": 0.5545105015961552,
      "MoBERT-min(F/N)": 0.5545105015961552,
      "MoBERT-max(F/N)": 0.6425043296941383,
      "MotionCritic": -3.410940647125244,
      "VeMo (human-opt view)": 0.01169864295741694,
      "VeMo (max entropy view)": 0.01169864295741694,
      "VeMo (min entropy view)": 0.007141843636191647,
      "VeMo (random view)": 0.01169864295741694,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person throws something upward with both hands."
  },
  "002662": {
    "text": "a man is crab walking backwards and then rolls to the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7953488581908505,
      "Minus Multimodal Distance": -11.116971015930176,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.017639687284827232,
      "MoBERT-F": 0.42750367908777087,
      "MoBERT-N": 0.43191830572078627,
      "MoBERT-min(F/N)": 0.42750367908777087,
      "MoBERT-max(F/N)": 0.43191830572078627,
      "MotionCritic": -2.1504151821136475,
      "VeMo (human-opt view)": 0.13312693498452013,
      "VeMo (max entropy view)": 0.40710382513661203,
      "VeMo (min entropy view)": 0.13312693498452013,
      "VeMo (random view)": 0.40710382513661203,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man is crab - walking backwards and then rolls to the left."
  },
  "002668": {
    "text": "a person has both hands on his legs.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5436165358029517,
      "Minus Multimodal Distance": -8.186014175415039,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.941175080195535e-05,
      "MoBERT-F": 0.4409546721113762,
      "MoBERT-N": 0.5266797897586935,
      "MoBERT-min(F/N)": 0.4409546721113762,
      "MoBERT-max(F/N)": 0.5266797897586935,
      "MotionCritic": -14.415226936340332,
      "VeMo (human-opt view)": 0.48370927318295737,
      "VeMo (max entropy view)": 0.48370927318295737,
      "VeMo (min entropy view)": 0.24493554327808473,
      "VeMo (random view)": 0.24493554327808473,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person has both hands on their legs."
  },
  "002718": {
    "text": "a person lunges forward bending their left knee and elbow.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4041638250344375,
      "Minus Multimodal Distance": -10.844526290893555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4159358872566372e-05,
      "MoBERT-F": 0.4043554092217484,
      "MoBERT-N": 0.4897685924902107,
      "MoBERT-min(F/N)": 0.4043554092217484,
      "MoBERT-max(F/N)": 0.4897685924902107,
      "MotionCritic": -6.390841484069824,
      "VeMo (human-opt view)": 0.7316017316017316,
      "VeMo (max entropy view)": 0.7316017316017316,
      "VeMo (min entropy view)": 0.7431906614785992,
      "VeMo (random view)": 0.7316017316017316,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person lunges forward, bending their left knee and elbow."
  },
  "002733": {
    "text": "cheerfully walking forward with each step.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3350291884432193,
      "Minus Multimodal Distance": -9.898993492126465,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.35150954127311707,
      "MoBERT-F": 0.7893334947820636,
      "MoBERT-N": 0.6876615704299756,
      "MoBERT-min(F/N)": 0.6876615704299756,
      "MoBERT-max(F/N)": 0.7893334947820636,
      "MotionCritic": -12.28531265258789,
      "VeMo (human-opt view)": 0.7658536585365854,
      "VeMo (max entropy view)": 0.7312883435582822,
      "VeMo (min entropy view)": 0.7658536585365854,
      "VeMo (random view)": 0.7312883435582822,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Cheerfully walking forward with each step."
  },
  "002741": {
    "text": "the man performed a tennis smash that won the match.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0566331582196293,
      "Minus Multimodal Distance": -9.54555606842041,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.01958930864930153,
      "MoBERT-F": 0.606840090406347,
      "MoBERT-N": 0.5256005278892298,
      "MoBERT-min(F/N)": 0.5256005278892298,
      "MoBERT-max(F/N)": 0.606840090406347,
      "MotionCritic": -6.241259574890137,
      "VeMo (human-opt view)": 0.25654450261780104,
      "VeMo (max entropy view)": 0.2690124858115778,
      "VeMo (min entropy view)": 0.25654450261780104,
      "VeMo (random view)": 0.2690124858115778,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man performed a tennis smash that won the match."
  },
  "002749": {
    "text": "person is waving with their right hand",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4612911902137581,
      "Minus Multimodal Distance": -9.428899765014648,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.405179475317709e-05,
      "MoBERT-F": 0.34183272589576724,
      "MoBERT-N": 0.45568362258311423,
      "MoBERT-min(F/N)": 0.34183272589576724,
      "MoBERT-max(F/N)": 0.45568362258311423,
      "MotionCritic": -13.606996536254883,
      "VeMo (human-opt view)": 0.9883105697059186,
      "VeMo (max entropy view)": 0.9850860420650096,
      "VeMo (min entropy view)": 0.9883105697059186,
      "VeMo (random view)": 0.9850860420650096,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is waving with their right hand."
  },
  "002750": {
    "text": "a man supports himself with his right hand, carefully going down to his knees.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0850952610288944,
      "Minus Multimodal Distance": -6.6406121253967285,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.795687578327488e-05,
      "MoBERT-F": 0.39504714774297556,
      "MoBERT-N": 0.49314309658230515,
      "MoBERT-min(F/N)": 0.39504714774297556,
      "MoBERT-max(F/N)": 0.49314309658230515,
      "MotionCritic": -6.640923976898193,
      "VeMo (human-opt view)": 0.9096744612563045,
      "VeMo (max entropy view)": 0.7671232876712328,
      "VeMo (min entropy view)": 0.9096744612563045,
      "VeMo (random view)": 0.9096744612563045,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man supports himself with his right hand and carefully goes down to his knees."
  },
  "002754": {
    "text": "a man does a dance, shuffles backward, and dances again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7604852464382268,
      "Minus Multimodal Distance": -14.647933959960938,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7750644221669063e-05,
      "MoBERT-F": 0.40004058929442604,
      "MoBERT-N": 0.5625737730009308,
      "MoBERT-min(F/N)": 0.40004058929442604,
      "MoBERT-max(F/N)": 0.5625737730009308,
      "MotionCritic": -5.78358793258667,
      "VeMo (human-opt view)": 0.0031623165148440082,
      "VeMo (max entropy view)": 0.0043316454151668595,
      "VeMo (min entropy view)": 0.0031623165148440082,
      "VeMo (random view)": 0.0031623165148440082,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man does a dance, shuffles backward, and dances again."
  },
  "002755": {
    "text": "a person jogs on the spot, then stands still",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0864783479444118,
      "Minus Multimodal Distance": -11.67300796508789,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.521031638025306e-05,
      "MoBERT-F": 0.43047174194938437,
      "MoBERT-N": 0.6003458452645575,
      "MoBERT-min(F/N)": 0.43047174194938437,
      "MoBERT-max(F/N)": 0.6003458452645575,
      "MotionCritic": -5.910274028778076,
      "VeMo (human-opt view)": 0.293598233995585,
      "VeMo (max entropy view)": 0.293598233995585,
      "VeMo (min entropy view)": 0.16411378555798686,
      "VeMo (random view)": 0.16411378555798686,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person jogs on the spot and then stands still."
  },
  "002761": {
    "text": "a person walks turning to the left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6853922610657018,
      "Minus Multimodal Distance": -8.288652420043945,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.328326809220016e-05,
      "MoBERT-F": 0.4080988837642297,
      "MoBERT-N": 0.5800103038934707,
      "MoBERT-min(F/N)": 0.4080988837642297,
      "MoBERT-max(F/N)": 0.5800103038934707,
      "MotionCritic": -9.932668685913086,
      "VeMo (human-opt view)": 0.9433419614610297,
      "VeMo (max entropy view)": 0.9433419614610297,
      "VeMo (min entropy view)": 0.9796727813584531,
      "VeMo (random view)": 0.9796727813584531,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks while turning to the left."
  },
  "002781": {
    "text": "a man walks around in a complete circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2464335808934481,
      "Minus Multimodal Distance": -11.332776069641113,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.329861672478728e-05,
      "MoBERT-F": 0.4185223831390977,
      "MoBERT-N": 0.44419017110225123,
      "MoBERT-min(F/N)": 0.4185223831390977,
      "MoBERT-max(F/N)": 0.44419017110225123,
      "MotionCritic": -7.897068500518799,
      "VeMo (human-opt view)": 0.8267782426778243,
      "VeMo (max entropy view)": 0.8264984227129337,
      "VeMo (min entropy view)": 0.8267782426778243,
      "VeMo (random view)": 0.8264984227129337,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks around in a complete circle."
  },
  "002789": {
    "text": "a man walks slowly forwards.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.72445022507775,
      "Minus Multimodal Distance": -10.43435001373291,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.741267962846905e-05,
      "MoBERT-F": 0.45657405550044033,
      "MoBERT-N": 0.5708166448331721,
      "MoBERT-min(F/N)": 0.45657405550044033,
      "MoBERT-max(F/N)": 0.5708166448331721,
      "MotionCritic": -9.122029304504395,
      "VeMo (human-opt view)": 0.9796893667861409,
      "VeMo (max entropy view)": 0.9285330467490597,
      "VeMo (min entropy view)": 0.9796893667861409,
      "VeMo (random view)": 0.9285330467490597,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks slowly forward."
  },
  "002795": {
    "text": "a person jogging in place.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2633697481969086,
      "Minus Multimodal Distance": -7.500395774841309,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3092914489097893e-05,
      "MoBERT-F": 0.3726960650647979,
      "MoBERT-N": 0.5032470760139898,
      "MoBERT-min(F/N)": 0.3726960650647979,
      "MoBERT-max(F/N)": 0.5032470760139898,
      "MotionCritic": -4.466920375823975,
      "VeMo (human-opt view)": 0.3632,
      "VeMo (max entropy view)": 0.3632,
      "VeMo (min entropy view)": 0.7769347496206374,
      "VeMo (random view)": 0.3632,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is jogging in place."
  },
  "002798": {
    "text": "a person is cheering and dancing",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0341484721952137,
      "Minus Multimodal Distance": -12.03366470336914,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5101266503334045,
      "MoBERT-F": 0.7108301707211684,
      "MoBERT-N": 0.6519181218813532,
      "MoBERT-min(F/N)": 0.6519181218813532,
      "MoBERT-max(F/N)": 0.7108301707211684,
      "MotionCritic": -6.097849369049072,
      "VeMo (human-opt view)": 0.7046413502109705,
      "VeMo (max entropy view)": 0.4376731301939058,
      "VeMo (min entropy view)": 0.7046413502109705,
      "VeMo (random view)": 0.4376731301939058,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is cheering and dancing."
  },
  "002799": {
    "text": "a person stretching their right arm",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6901219276680549,
      "Minus Multimodal Distance": -6.350381851196289,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4165095965145156e-05,
      "MoBERT-F": 0.38342502778475457,
      "MoBERT-N": 0.49848867208782277,
      "MoBERT-min(F/N)": 0.38342502778475457,
      "MoBERT-max(F/N)": 0.49848867208782277,
      "MotionCritic": -7.3285956382751465,
      "VeMo (human-opt view)": 0.9841162554917202,
      "VeMo (max entropy view)": 0.9841162554917202,
      "VeMo (min entropy view)": 0.989040758527275,
      "VeMo (random view)": 0.9841162554917202,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is stretching their right arm."
  },
  "002842": {
    "text": "a person walks forward 4 steps.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9029479993137993,
      "Minus Multimodal Distance": -2.9369006156921387,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.9196347643155605e-05,
      "MoBERT-F": 0.6043151160300291,
      "MoBERT-N": 0.7033255094172486,
      "MoBERT-min(F/N)": 0.6043151160300291,
      "MoBERT-max(F/N)": 0.7033255094172486,
      "MotionCritic": -7.741724491119385,
      "VeMo (human-opt view)": 0.9498164014687882,
      "VeMo (max entropy view)": 0.9399744572158365,
      "VeMo (min entropy view)": 0.9498164014687882,
      "VeMo (random view)": 0.9399744572158365,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward 4 steps."
  },
  "002868": {
    "text": "a person does a drumming movement with both hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4881707873427125,
      "Minus Multimodal Distance": -7.393267631530762,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.06635813415050507,
      "MoBERT-F": 0.5676141362357877,
      "MoBERT-N": 0.5252773767184048,
      "MoBERT-min(F/N)": 0.5252773767184048,
      "MoBERT-max(F/N)": 0.5676141362357877,
      "MotionCritic": -7.943915843963623,
      "VeMo (human-opt view)": 0.8442776735459663,
      "VeMo (max entropy view)": 0.8442776735459663,
      "VeMo (min entropy view)": 0.06356589147286822,
      "VeMo (random view)": 0.06356589147286822,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person makes a drumming movement with both hands."
  },
  "002870": {
    "text": "a person walks from side to side with their arms hanging loosely.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5779403283981608,
      "Minus Multimodal Distance": -6.685866832733154,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4815049982862547e-05,
      "MoBERT-F": 0.45646564303326964,
      "MoBERT-N": 0.5832405911361003,
      "MoBERT-min(F/N)": 0.45646564303326964,
      "MoBERT-max(F/N)": 0.5832405911361003,
      "MotionCritic": -4.081237316131592,
      "VeMo (human-opt view)": 0.9101034208432777,
      "VeMo (max entropy view)": 0.9101034208432777,
      "VeMo (min entropy view)": 0.9150661545027742,
      "VeMo (random view)": 0.9101034208432777,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks from side to side with their arms hanging loosely."
  },
  "002876": {
    "text": "the person is moving something around.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1779603841329773,
      "Minus Multimodal Distance": -12.017395973205566,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.1998910754919052,
      "MoBERT-F": 0.7288699596972269,
      "MoBERT-N": 0.6491642483941413,
      "MoBERT-min(F/N)": 0.6491642483941413,
      "MoBERT-max(F/N)": 0.7288699596972269,
      "MotionCritic": -0.8887683749198914,
      "VeMo (human-opt view)": 0.6654478976234004,
      "VeMo (max entropy view)": 0.6215538847117794,
      "VeMo (min entropy view)": 0.6654478976234004,
      "VeMo (random view)": 0.6654478976234004,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is moving something around."
  },
  "002888": {
    "text": "a person walking down a stage.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2461740284009686,
      "Minus Multimodal Distance": -8.60546875,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.417708128632512e-05,
      "MoBERT-F": 0.4089803974234388,
      "MoBERT-N": 0.5466896575904436,
      "MoBERT-min(F/N)": 0.4089803974234388,
      "MoBERT-max(F/N)": 0.5466896575904436,
      "MotionCritic": -9.57894229888916,
      "VeMo (human-opt view)": 0.967062818336163,
      "VeMo (max entropy view)": 0.960256068284876,
      "VeMo (min entropy view)": 0.967062818336163,
      "VeMo (random view)": 0.967062818336163,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking down a stage."
  },
  "002897": {
    "text": "a person jauntily skips forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7512214247407458,
      "Minus Multimodal Distance": -8.959165573120117,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9621177911758423,
      "MoBERT-F": 0.9528172269665736,
      "MoBERT-N": 0.8472385746621397,
      "MoBERT-min(F/N)": 0.8472385746621397,
      "MoBERT-max(F/N)": 0.9528172269665736,
      "MotionCritic": -12.397738456726074,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.23414071510957324,
      "VeMo (random view)": 0.23414071510957324,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person jauntily skips forward."
  },
  "002899": {
    "text": "a person is leaning forward and making rapid movements with their right arm.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1289256345008019,
      "Minus Multimodal Distance": -9.310040473937988,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6181178327533416e-05,
      "MoBERT-F": 0.35427208990810666,
      "MoBERT-N": 0.4139268543052145,
      "MoBERT-min(F/N)": 0.35427208990810666,
      "MoBERT-max(F/N)": 0.4139268543052145,
      "MotionCritic": -3.634279251098633,
      "VeMo (human-opt view)": 0.9362808842652796,
      "VeMo (max entropy view)": 0.9098966026587888,
      "VeMo (min entropy view)": 0.9362808842652796,
      "VeMo (random view)": 0.9362808842652796,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is leaning forward and making rapid movements with their right arm."
  },
  "002917": {
    "text": "a man moves his hand up in the air.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2378344625355433,
      "Minus Multimodal Distance": -6.218223571777344,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.393414201331325e-05,
      "MoBERT-F": 0.5264927977200443,
      "MoBERT-N": 0.6220702646758799,
      "MoBERT-min(F/N)": 0.5264927977200443,
      "MoBERT-max(F/N)": 0.6220702646758799,
      "MotionCritic": -5.670770168304443,
      "VeMo (human-opt view)": 0.9890706578489384,
      "VeMo (max entropy view)": 0.9890706578489384,
      "VeMo (min entropy view)": 0.9928752011031947,
      "VeMo (random view)": 0.9890706578489384,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man moves his hand up in the air."
  },
  "002922": {
    "text": "someone completes a full jumping jack before stopping.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.335555339880993,
      "Minus Multimodal Distance": -7.044375419616699,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8880276679992676,
      "MoBERT-F": 0.8006159276630633,
      "MoBERT-N": 0.7793320861725299,
      "MoBERT-min(F/N)": 0.7793320861725299,
      "MoBERT-max(F/N)": 0.8006159276630633,
      "MotionCritic": -8.803503036499023,
      "VeMo (human-opt view)": 0.8261758691206544,
      "VeMo (max entropy view)": 0.8261758691206544,
      "VeMo (min entropy view)": 0.9049773755656109,
      "VeMo (random view)": 0.9049773755656109,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone completes a full jumping jack before stopping."
  },
  "002928": {
    "text": "a person stumbling to their right side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2471365333479378,
      "Minus Multimodal Distance": -9.713275909423828,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.356929144298192e-05,
      "MoBERT-F": 0.48953349537552054,
      "MoBERT-N": 0.5934214263101547,
      "MoBERT-min(F/N)": 0.48953349537552054,
      "MoBERT-max(F/N)": 0.5934214263101547,
      "MotionCritic": -10.930450439453125,
      "VeMo (human-opt view)": 0.9197860962566845,
      "VeMo (max entropy view)": 0.8672897196261682,
      "VeMo (min entropy view)": 0.9197860962566845,
      "VeMo (random view)": 0.8672897196261682,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is stumbling to their right side."
  },
  "002931": {
    "text": "a person grab with hands something and carries over to the other place",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4399890782149052,
      "Minus Multimodal Distance": -2.8564164638519287,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0027002128772437572,
      "MoBERT-F": 0.6213370538954075,
      "MoBERT-N": 0.42641782082201735,
      "MoBERT-min(F/N)": 0.42641782082201735,
      "MoBERT-max(F/N)": 0.6213370538954075,
      "MotionCritic": -10.260122299194336,
      "VeMo (human-opt view)": 1.9010588898016193e-05,
      "VeMo (max entropy view)": 1.9010588898016193e-05,
      "VeMo (min entropy view)": 1.780160367018206e-05,
      "VeMo (random view)": 1.780160367018206e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person grabs something with their hands and carries it to another place."
  },
  "002932": {
    "text": "the person does 2 cartwheels",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.6416627348272974,
      "Minus Multimodal Distance": -10.191045761108398,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3853457605582662e-05,
      "MoBERT-F": 0.36343217867809785,
      "MoBERT-N": 0.45430817984240096,
      "MoBERT-min(F/N)": 0.36343217867809785,
      "MoBERT-max(F/N)": 0.45430817984240096,
      "MotionCritic": -9.218957901000977,
      "VeMo (human-opt view)": 0.9147788565264293,
      "VeMo (max entropy view)": 0.9147788565264293,
      "VeMo (min entropy view)": 0.9149828440716736,
      "VeMo (random view)": 0.9149828440716736,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person does 2 cartwheels."
  },
  "002950": {
    "text": "a person hops forward with both legs and after a few hops they hop on top of something then back down right after.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.1692613325913346,
      "Minus Multimodal Distance": -5.393826007843018,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.06302785652224e-05,
      "MoBERT-F": 0.5426583842503316,
      "MoBERT-N": 0.618612659155583,
      "MoBERT-min(F/N)": 0.5426583842503316,
      "MoBERT-max(F/N)": 0.618612659155583,
      "MotionCritic": -11.208724975585938,
      "VeMo (human-opt view)": 0.0002459726705442051,
      "VeMo (max entropy view)": 0.0002459726705442051,
      "VeMo (min entropy view)": 0.00022914072229140723,
      "VeMo (random view)": 0.0002459726705442051,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person hops forward with both legs. After a few hops, they hop on top of something and then immediately hop back down."
  },
  "002954": {
    "text": "walks in a tight circle the runs back and walks in a tight circle.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7993788609627517,
      "Minus Multimodal Distance": -9.865091323852539,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.326306978124194e-05,
      "MoBERT-F": 0.4518127555455699,
      "MoBERT-N": 0.5064336007078012,
      "MoBERT-min(F/N)": 0.4518127555455699,
      "MoBERT-max(F/N)": 0.5064336007078012,
      "MotionCritic": -11.346582412719727,
      "VeMo (human-opt view)": 0.7182795698924731,
      "VeMo (max entropy view)": 0.6506024096385542,
      "VeMo (min entropy view)": 0.7182795698924731,
      "VeMo (random view)": 0.6506024096385542,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks in a tight circle, then runs back and walks in a tight circle."
  },
  "002976": {
    "text": "a person walks forward and turns clockwise, then grabs their knee in pain before continuing ahead and turning clockwise again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2857136061837489,
      "Minus Multimodal Distance": -11.036967277526855,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.116902007604949e-05,
      "MoBERT-F": 0.4790689850478413,
      "MoBERT-N": 0.5087971251599503,
      "MoBERT-min(F/N)": 0.4790689850478413,
      "MoBERT-max(F/N)": 0.5087971251599503,
      "MotionCritic": -17.707035064697266,
      "VeMo (human-opt view)": 0.7310704960835509,
      "VeMo (max entropy view)": 0.7310704960835509,
      "VeMo (min entropy view)": 0.7436918990703851,
      "VeMo (random view)": 0.7310704960835509,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward and turns clockwise. Then, they grab their knee in pain before continuing ahead and turning clockwise again."
  },
  "003005": {
    "text": "he does a salsa dance",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5080847719180923,
      "Minus Multimodal Distance": -14.82625961303711,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0001240437850356102,
      "MoBERT-F": 0.5458233625963028,
      "MoBERT-N": 0.6044111613952869,
      "MoBERT-min(F/N)": 0.5458233625963028,
      "MoBERT-max(F/N)": 0.6044111613952869,
      "MotionCritic": -10.70515251159668,
      "VeMo (human-opt view)": 0.5307443365695793,
      "VeMo (max entropy view)": 0.5307443365695793,
      "VeMo (min entropy view)": 0.5310880829015544,
      "VeMo (random view)": 0.5310880829015544,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "He does a salsa dance."
  },
  "003020": {
    "text": "a man slowly sways from side to side, sightly bending his knees.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2392650388432098,
      "Minus Multimodal Distance": -8.766599655151367,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3554661311209202e-05,
      "MoBERT-F": 0.39998237134824915,
      "MoBERT-N": 0.45975271362605935,
      "MoBERT-min(F/N)": 0.39998237134824915,
      "MoBERT-max(F/N)": 0.45975271362605935,
      "MotionCritic": -5.3551764488220215,
      "VeMo (human-opt view)": 0.7774294670846394,
      "VeMo (max entropy view)": 0.7306122448979592,
      "VeMo (min entropy view)": 0.7774294670846394,
      "VeMo (random view)": 0.7774294670846394,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man slowly sways from side to side, slightly bending his knees."
  },
  "003027": {
    "text": "the person makes a right turn.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8965192879021951,
      "Minus Multimodal Distance": -3.1490306854248047,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2716892999596894e-05,
      "MoBERT-F": 0.4736071621636625,
      "MoBERT-N": 0.5676887331414532,
      "MoBERT-min(F/N)": 0.4736071621636625,
      "MoBERT-max(F/N)": 0.5676887331414532,
      "MotionCritic": -4.565657138824463,
      "VeMo (human-opt view)": 0.9466972711043101,
      "VeMo (max entropy view)": 0.9466300701433363,
      "VeMo (min entropy view)": 0.9466972711043101,
      "VeMo (random view)": 0.9466300701433363,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person makes a right turn."
  },
  "003062": {
    "text": "a person is standing still while waving his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4104281205814451,
      "Minus Multimodal Distance": -14.383384704589844,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.5607525205705315e-05,
      "MoBERT-F": 0.351036787358034,
      "MoBERT-N": 0.4225049997903091,
      "MoBERT-min(F/N)": 0.351036787358034,
      "MoBERT-max(F/N)": 0.4225049997903091,
      "MotionCritic": -10.608633041381836,
      "VeMo (human-opt view)": 0.9819967266775778,
      "VeMo (max entropy view)": 0.9626461362988309,
      "VeMo (min entropy view)": 0.9819967266775778,
      "VeMo (random view)": 0.9819967266775778,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing still while waving his right hand."
  },
  "003082": {
    "text": "standing forward, hands are at the side moving toward sides of the head and back down while the legs are slightly jumping outwards. creating a jumping jack.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5526228808524059,
      "Minus Multimodal Distance": -8.54318618774414,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.11500946432352066,
      "MoBERT-F": 0.7898600268782999,
      "MoBERT-N": 0.6792763830345174,
      "MoBERT-min(F/N)": 0.6792763830345174,
      "MoBERT-max(F/N)": 0.7898600268782999,
      "MotionCritic": -7.0410308837890625,
      "VeMo (human-opt view)": 0.9048569527611444,
      "VeMo (max entropy view)": 0.8808172531214529,
      "VeMo (min entropy view)": 0.9048569527611444,
      "VeMo (random view)": 0.9048569527611444,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing forward, hands are at the sides, moving toward the sides of the head and back down while the legs are slightly jumping outwards, creating a jumping jack."
  },
  "003083": {
    "text": "the person starts by sitting and dials with their right hand then holds the phone with their left and starts by turning in their chair, before standing up walking to the left around the back of the chair then sitting down again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.382136641408181,
      "Minus Multimodal Distance": -5.108242988586426,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5752926376299e-05,
      "MoBERT-F": 0.21682928857496248,
      "MoBERT-N": 0.3681565640182797,
      "MoBERT-min(F/N)": 0.21682928857496248,
      "MoBERT-max(F/N)": 0.3681565640182797,
      "MotionCritic": -9.334360122680664,
      "VeMo (human-opt view)": 2.355687378869484e-05,
      "VeMo (max entropy view)": 0.00029599779819181694,
      "VeMo (min entropy view)": 2.355687378869484e-05,
      "VeMo (random view)": 2.355687378869484e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person starts by sitting and dials with their right hand. Then, they hold the phone with their left hand and start turning in their chair. Before standing up, they walk to the left around the back of the chair and then sit down again."
  },
  "003105": {
    "text": "a person standing up and greeting someone with a respectful hand to forehead gesture.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.831960165918493,
      "Minus Multimodal Distance": -5.1578755378723145,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4810815375531092e-05,
      "MoBERT-F": 0.4473919808162016,
      "MoBERT-N": 0.509856064842279,
      "MoBERT-min(F/N)": 0.4473919808162016,
      "MoBERT-max(F/N)": 0.509856064842279,
      "MotionCritic": -6.299403190612793,
      "VeMo (human-opt view)": 0.9579563597658329,
      "VeMo (max entropy view)": 0.9552955295529553,
      "VeMo (min entropy view)": 0.9579563597658329,
      "VeMo (random view)": 0.9579563597658329,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands up and greets someone with a respectful hand - to - forehead gesture."
  },
  "003108": {
    "text": "a person with a wide stance repeatedly lash out with each hand at something in front of them.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9521172974416529,
      "Minus Multimodal Distance": -3.180555582046509,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.7114361475687474e-05,
      "MoBERT-F": 0.4849702313276568,
      "MoBERT-N": 0.5015672917829943,
      "MoBERT-min(F/N)": 0.4849702313276568,
      "MoBERT-max(F/N)": 0.5015672917829943,
      "MotionCritic": -3.001612663269043,
      "VeMo (human-opt view)": 0.530214424951267,
      "VeMo (max entropy view)": 0.530214424951267,
      "VeMo (min entropy view)": 0.8444040036396724,
      "VeMo (random view)": 0.8444040036396724,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person with a wide stance repeatedly lashes out with each hand at something in front of them."
  },
  "003116": {
    "text": "person walks slightly to the right",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8093392458590244,
      "Minus Multimodal Distance": -4.705110549926758,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3369981136056595e-05,
      "MoBERT-F": 0.46296969610630134,
      "MoBERT-N": 0.6026628011237003,
      "MoBERT-min(F/N)": 0.46296969610630134,
      "MoBERT-max(F/N)": 0.6026628011237003,
      "MotionCritic": -5.593653202056885,
      "VeMo (human-opt view)": 0.9688737973967176,
      "VeMo (max entropy view)": 0.9603751465416178,
      "VeMo (min entropy view)": 0.9688737973967176,
      "VeMo (random view)": 0.9603751465416178,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks slightly to the right."
  },
  "003119": {
    "text": "a person standing and acting like a chicken.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6643208820959356,
      "Minus Multimodal Distance": -11.897354125976562,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.000155275542056188,
      "MoBERT-F": 0.5820888810349019,
      "MoBERT-N": 0.5400727780927506,
      "MoBERT-min(F/N)": 0.5400727780927506,
      "MoBERT-max(F/N)": 0.5820888810349019,
      "MotionCritic": -6.607437610626221,
      "VeMo (human-opt view)": 0.04197852261633583,
      "VeMo (max entropy view)": 0.2567049808429119,
      "VeMo (min entropy view)": 0.04197852261633583,
      "VeMo (random view)": 0.2567049808429119,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing and acting like a chicken."
  },
  "003138": {
    "text": "a person opens a door and appears to be swimming.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7777624862954949,
      "Minus Multimodal Distance": -10.395934104919434,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5737606847542338e-05,
      "MoBERT-F": 0.49121900150173003,
      "MoBERT-N": 0.5451585410363629,
      "MoBERT-min(F/N)": 0.49121900150173003,
      "MoBERT-max(F/N)": 0.5451585410363629,
      "MotionCritic": -3.2735133171081543,
      "VeMo (human-opt view)": 3.5374846601456505e-05,
      "VeMo (max entropy view)": 0.0002867094721458072,
      "VeMo (min entropy view)": 3.5374846601456505e-05,
      "VeMo (random view)": 0.0002867094721458072,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person opens a door and appears to be swimming."
  },
  "003142": {
    "text": "a person crawls on hands and knees forward, stands up with side showing.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.747114026706183,
      "Minus Multimodal Distance": -8.689887046813965,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3004840841167606e-05,
      "MoBERT-F": 0.4798100829754416,
      "MoBERT-N": 0.4929937926818349,
      "MoBERT-min(F/N)": 0.4798100829754416,
      "MoBERT-max(F/N)": 0.4929937926818349,
      "MotionCritic": -7.233882427215576,
      "VeMo (human-opt view)": 0.9986723798377912,
      "VeMo (max entropy view)": 0.9976751374087861,
      "VeMo (min entropy view)": 0.9986723798377912,
      "VeMo (random view)": 0.9986723798377912,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person crawls forward on hands and knees, then stands up with their side showing."
  },
  "003149": {
    "text": "a person standing on one foot holds their right hand up while moving their left foot in a side to side motion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0054378476646786,
      "Minus Multimodal Distance": -8.125114440917969,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3907157810754143e-05,
      "MoBERT-F": 0.46067290090175383,
      "MoBERT-N": 0.5570280792226696,
      "MoBERT-min(F/N)": 0.46067290090175383,
      "MoBERT-max(F/N)": 0.5570280792226696,
      "MotionCritic": -2.6460044384002686,
      "VeMo (human-opt view)": 0.9146486701071854,
      "VeMo (max entropy view)": 0.8872638634978671,
      "VeMo (min entropy view)": 0.9146486701071854,
      "VeMo (random view)": 0.9146486701071854,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person standing on one foot holds their right hand up while moving their left foot in a side - to - side motion."
  },
  "003171": {
    "text": "the person is walking down arms out.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9328252147156993,
      "Minus Multimodal Distance": -10.97420883178711,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0002625775814522058,
      "MoBERT-F": 0.6160265862826056,
      "MoBERT-N": 0.5572906381992395,
      "MoBERT-min(F/N)": 0.5572906381992395,
      "MoBERT-max(F/N)": 0.6160265862826056,
      "MotionCritic": -7.4404826164245605,
      "VeMo (human-opt view)": 0.9360936093609361,
      "VeMo (max entropy view)": 0.9101251422070534,
      "VeMo (min entropy view)": 0.9360936093609361,
      "VeMo (random view)": 0.9360936093609361,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is walking down with arms out."
  },
  "003174": {
    "text": "a person is walking across a narrow beam.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.247573526635554,
      "Minus Multimodal Distance": -10.407868385314941,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.160919004585594e-05,
      "MoBERT-F": 0.576339153465228,
      "MoBERT-N": 0.5968984713257338,
      "MoBERT-min(F/N)": 0.576339153465228,
      "MoBERT-max(F/N)": 0.5968984713257338,
      "MotionCritic": -10.984489440917969,
      "VeMo (human-opt view)": 0.004067197170645446,
      "VeMo (max entropy view)": 0.004080503492634346,
      "VeMo (min entropy view)": 0.004067197170645446,
      "VeMo (random view)": 0.004067197170645446,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking across a narrow beam."
  },
  "003191": {
    "text": "a person lobs tennis balls with their right arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1043868058432835,
      "Minus Multimodal Distance": -8.94444465637207,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0024572028778493404,
      "MoBERT-F": 0.5991921533465633,
      "MoBERT-N": 0.436285593850564,
      "MoBERT-min(F/N)": 0.436285593850564,
      "MoBERT-max(F/N)": 0.5991921533465633,
      "MotionCritic": -3.598928928375244,
      "VeMo (human-opt view)": 0.3920595533498759,
      "VeMo (max entropy view)": 0.3920595533498759,
      "VeMo (min entropy view)": 0.2328159645232816,
      "VeMo (random view)": 0.3920595533498759,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person lobs tennis balls with their right arm."
  },
  "003193": {
    "text": "a person runs back and forth",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.1191259813797974,
      "Minus Multimodal Distance": -2.8518924713134766,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0023454297333955765,
      "MoBERT-F": 0.48519376246035295,
      "MoBERT-N": 0.5246399066462918,
      "MoBERT-min(F/N)": 0.48519376246035295,
      "MoBERT-max(F/N)": 0.5246399066462918,
      "MotionCritic": -3.914461135864258,
      "VeMo (human-opt view)": 0.964964964964965,
      "VeMo (max entropy view)": 0.964964964964965,
      "VeMo (min entropy view)": 0.9859426150587329,
      "VeMo (random view)": 0.9859426150587329,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs back and forth."
  },
  "003195": {
    "text": "a person is standing and steps backwards.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5102029999783083,
      "Minus Multimodal Distance": -11.889232635498047,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5693688988685608,
      "MoBERT-F": 0.5940182363171926,
      "MoBERT-N": 0.5448272459937357,
      "MoBERT-min(F/N)": 0.5448272459937357,
      "MoBERT-max(F/N)": 0.5940182363171926,
      "MotionCritic": -8.285846710205078,
      "VeMo (human-opt view)": 0.6655574043261231,
      "VeMo (max entropy view)": 0.6514886164623468,
      "VeMo (min entropy view)": 0.6655574043261231,
      "VeMo (random view)": 0.6655574043261231,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing and steps backward."
  },
  "003227": {
    "text": "a person walks back and forth from right to left, stops, and stands in one place.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.7235849755852808,
      "Minus Multimodal Distance": -7.344095230102539,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.023970462381839752,
      "MoBERT-F": 0.6628146025366456,
      "MoBERT-N": 0.6430719191666765,
      "MoBERT-min(F/N)": 0.6430719191666765,
      "MoBERT-max(F/N)": 0.6628146025366456,
      "MotionCritic": -9.604569435119629,
      "VeMo (human-opt view)": 0.9796954314720813,
      "VeMo (max entropy view)": 0.8929384965831435,
      "VeMo (min entropy view)": 0.9796954314720813,
      "VeMo (random view)": 0.8929384965831435,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks back and forth from right to left, stops, and stands in one place."
  },
  "003245": {
    "text": "a person raises their arms high above their head.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9169350870681179,
      "Minus Multimodal Distance": -5.755463600158691,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0007513432065024972,
      "MoBERT-F": 0.5567499906449173,
      "MoBERT-N": 0.5646350207558019,
      "MoBERT-min(F/N)": 0.5567499906449173,
      "MoBERT-max(F/N)": 0.5646350207558019,
      "MotionCritic": -1.6741664409637451,
      "VeMo (human-opt view)": 0.9924491967769726,
      "VeMo (max entropy view)": 0.9924491967769726,
      "VeMo (min entropy view)": 0.9964162810015427,
      "VeMo (random view)": 0.9964162810015427,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises their arms high above their head."
  },
  "003278": {
    "text": "a person marches forward aggressively than turns around and does the same.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5790578294393225,
      "Minus Multimodal Distance": -10.066598892211914,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.144098445773125e-05,
      "MoBERT-F": 0.575453736426291,
      "MoBERT-N": 0.6432851529425121,
      "MoBERT-min(F/N)": 0.575453736426291,
      "MoBERT-max(F/N)": 0.6432851529425121,
      "MotionCritic": -12.501426696777344,
      "VeMo (human-opt view)": 0.8175824175824176,
      "VeMo (max entropy view)": 0.7670639219934995,
      "VeMo (min entropy view)": 0.8175824175824176,
      "VeMo (random view)": 0.8175824175824176,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person marches forward aggressively, then turns around and does the same."
  },
  "003282": {
    "text": "the person is standing up being flexable",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0659069097466787,
      "Minus Multimodal Distance": -10.563716888427734,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.921316940453835e-05,
      "MoBERT-F": 0.5384504871002814,
      "MoBERT-N": 0.6412679919742407,
      "MoBERT-min(F/N)": 0.5384504871002814,
      "MoBERT-max(F/N)": 0.6412679919742407,
      "MotionCritic": -20.32284164428711,
      "VeMo (human-opt view)": 0.8600790513833992,
      "VeMo (max entropy view)": 0.8263473053892215,
      "VeMo (min entropy view)": 0.8600790513833992,
      "VeMo (random view)": 0.8263473053892215,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is standing up and being flexible."
  },
  "003363": {
    "text": "a person hammers a nail into a wall.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9137300115158185,
      "Minus Multimodal Distance": -3.262375593185425,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5856998036033474e-05,
      "MoBERT-F": 0.38169975621704855,
      "MoBERT-N": 0.499483855391879,
      "MoBERT-min(F/N)": 0.38169975621704855,
      "MoBERT-max(F/N)": 0.499483855391879,
      "MotionCritic": -5.335877895355225,
      "VeMo (human-opt view)": 3.7263802022188208e-06,
      "VeMo (max entropy view)": 0.004585556836770266,
      "VeMo (min entropy view)": 3.7263802022188208e-06,
      "VeMo (random view)": 0.004585556836770266,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person hammers a nail into a wall."
  },
  "003373": {
    "text": "the stick figure is walking in form of a back wards letter j.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5083140054852173,
      "Minus Multimodal Distance": -2.9169962406158447,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00017250888049602509,
      "MoBERT-F": 0.5097346519649921,
      "MoBERT-N": 0.5088088371806554,
      "MoBERT-min(F/N)": 0.5088088371806554,
      "MoBERT-max(F/N)": 0.5097346519649921,
      "MotionCritic": -9.371142387390137,
      "VeMo (human-opt view)": 0.8355263157894737,
      "VeMo (max entropy view)": 0.8355263157894737,
      "VeMo (min entropy view)": 0.8436363636363636,
      "VeMo (random view)": 0.8355263157894737,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The stick figure is walking in the form of a backwards letter J."
  },
  "003379": {
    "text": "the character scratches his head with his right arm",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.42697914059507175,
      "Minus Multimodal Distance": -7.387441635131836,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.362377901794389e-05,
      "MoBERT-F": 0.3179450623332911,
      "MoBERT-N": 0.4386125135894381,
      "MoBERT-min(F/N)": 0.3179450623332911,
      "MoBERT-max(F/N)": 0.4386125135894381,
      "MotionCritic": -13.783729553222656,
      "VeMo (human-opt view)": 0.9400868306801736,
      "VeMo (max entropy view)": 0.9400868306801736,
      "VeMo (min entropy view)": 0.9432206019236736,
      "VeMo (random view)": 0.9432206019236736,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The character scratches his head with his right arm."
  },
  "003430": {
    "text": "a person walks in anticlockwise direction dragging his left feet.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7226935369891887,
      "Minus Multimodal Distance": -11.634803771972656,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9400056004524231,
      "MoBERT-F": 0.7276271577324085,
      "MoBERT-N": 0.7429692718171261,
      "MoBERT-min(F/N)": 0.7276271577324085,
      "MoBERT-max(F/N)": 0.7429692718171261,
      "MotionCritic": -7.17611837387085,
      "VeMo (human-opt view)": 0.8077858880778589,
      "VeMo (max entropy view)": 0.7881944444444444,
      "VeMo (min entropy view)": 0.8077858880778589,
      "VeMo (random view)": 0.8077858880778589,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks in an anticlockwise direction, dragging his left foot."
  },
  "003432": {
    "text": "a person sits on a ledge",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4336689989406597,
      "Minus Multimodal Distance": -6.283966064453125,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5229654056602158e-05,
      "MoBERT-F": 0.35634140052744406,
      "MoBERT-N": 0.4077814036324396,
      "MoBERT-min(F/N)": 0.35634140052744406,
      "MoBERT-max(F/N)": 0.4077814036324396,
      "MotionCritic": -14.04997444152832,
      "VeMo (human-opt view)": 0.6650366748166259,
      "VeMo (max entropy view)": 0.3482142857142857,
      "VeMo (min entropy view)": 0.6650366748166259,
      "VeMo (random view)": 0.3482142857142857,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits on a ledge."
  },
  "003440": {
    "text": "the person is moving from side to side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1047123584167204,
      "Minus Multimodal Distance": -8.86019515991211,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.5521482825279236,
      "MoBERT-F": 0.7110257570969322,
      "MoBERT-N": 0.7308410139976197,
      "MoBERT-min(F/N)": 0.7110257570969322,
      "MoBERT-max(F/N)": 0.7308410139976197,
      "MotionCritic": -7.3943400382995605,
      "VeMo (human-opt view)": 0.8806366047745358,
      "VeMo (max entropy view)": 0.8438356164383561,
      "VeMo (min entropy view)": 0.8806366047745358,
      "VeMo (random view)": 0.8806366047745358,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is moving from side to side."
  },
  "003441": {
    "text": "a person appears to be raising both his arms with difficulty.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2207250053033083,
      "Minus Multimodal Distance": -6.393324851989746,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.3525502405827865e-05,
      "MoBERT-F": 0.3708024512624612,
      "MoBERT-N": 0.424244483258213,
      "MoBERT-min(F/N)": 0.3708024512624612,
      "MoBERT-max(F/N)": 0.424244483258213,
      "MotionCritic": -6.443206310272217,
      "VeMo (human-opt view)": 0.9323583180987203,
      "VeMo (max entropy view)": 0.8990936555891239,
      "VeMo (min entropy view)": 0.9323583180987203,
      "VeMo (random view)": 0.8990936555891239,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person appears to be raising both his arms with difficulty."
  },
  "003481": {
    "text": "a person steps forward and kicks aggressively with their right leg, then quickly squats and jumps.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.306518745748324,
      "Minus Multimodal Distance": -11.204944610595703,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.005632665473967791,
      "MoBERT-F": 0.6250055196437609,
      "MoBERT-N": 0.5898878327891176,
      "MoBERT-min(F/N)": 0.5898878327891176,
      "MoBERT-max(F/N)": 0.6250055196437609,
      "MotionCritic": -4.9772138595581055,
      "VeMo (human-opt view)": 0.8866666666666667,
      "VeMo (max entropy view)": 0.8519758874748827,
      "VeMo (min entropy view)": 0.8866666666666667,
      "VeMo (random view)": 0.8866666666666667,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person steps forward and kicks aggressively with their right leg, then quickly squats and jumps."
  },
  "003483": {
    "text": "a man stands still and then starts dancing around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2570920035086715,
      "Minus Multimodal Distance": -8.049981117248535,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 6.123670027591288e-05,
      "MoBERT-F": 0.5336093626955931,
      "MoBERT-N": 0.6390767595560458,
      "MoBERT-min(F/N)": 0.5336093626955931,
      "MoBERT-max(F/N)": 0.6390767595560458,
      "MotionCritic": -10.25429916381836,
      "VeMo (human-opt view)": 0.5938375350140056,
      "VeMo (max entropy view)": 0.5938375350140056,
      "VeMo (min entropy view)": 0.765625,
      "VeMo (random view)": 0.5938375350140056,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man stands still and then starts dancing around."
  },
  "003539": {
    "text": "a person is pushed by their left arm while walking forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.284266931531838,
      "Minus Multimodal Distance": -6.93511962890625,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5510937120998278e-05,
      "MoBERT-F": 0.4916805360840846,
      "MoBERT-N": 0.42705093078608686,
      "MoBERT-min(F/N)": 0.42705093078608686,
      "MoBERT-max(F/N)": 0.4916805360840846,
      "MotionCritic": -13.434518814086914,
      "VeMo (human-opt view)": 0.7063829787234043,
      "VeMo (max entropy view)": 0.7063829787234043,
      "VeMo (min entropy view)": 0.7304347826086957,
      "VeMo (random view)": 0.7304347826086957,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is pushed by their left arm while walking forward."
  },
  "003547": {
    "text": "a person tries to screw something large into place.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9117289149945027,
      "Minus Multimodal Distance": -7.889179229736328,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.3781128525733948,
      "MoBERT-F": 0.684482810716482,
      "MoBERT-N": 0.6280704535851961,
      "MoBERT-min(F/N)": 0.6280704535851961,
      "MoBERT-max(F/N)": 0.684482810716482,
      "MotionCritic": -7.738595485687256,
      "VeMo (human-opt view)": 0.0003574865942527155,
      "VeMo (max entropy view)": 0.14014752370916755,
      "VeMo (min entropy view)": 0.0003574865942527155,
      "VeMo (random view)": 0.14014752370916755,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person tries to screw something large into place."
  },
  "003566": {
    "text": "person quickly walks in a  clockwise position with shoulders facing back happy",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.385260927564375,
      "Minus Multimodal Distance": -11.673861503601074,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.81237098533893e-05,
      "MoBERT-F": 0.5022565145146584,
      "MoBERT-N": 0.5596102381236114,
      "MoBERT-min(F/N)": 0.5022565145146584,
      "MoBERT-max(F/N)": 0.5596102381236114,
      "MotionCritic": -9.758070945739746,
      "VeMo (human-opt view)": 0.11285061969993476,
      "VeMo (max entropy view)": 0.19220549158547387,
      "VeMo (min entropy view)": 0.11285061969993476,
      "VeMo (random view)": 0.19220549158547387,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person quickly walks in a clockwise direction with their shoulders facing backward, looking happy."
  },
  "003575": {
    "text": "a person standing straight ,holding hands .",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6767380057089643,
      "Minus Multimodal Distance": -14.468620300292969,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0029275654815137386,
      "MoBERT-F": 0.5995062652558917,
      "MoBERT-N": 0.5637217782152409,
      "MoBERT-min(F/N)": 0.5637217782152409,
      "MoBERT-max(F/N)": 0.5995062652558917,
      "MotionCritic": -10.019938468933105,
      "VeMo (human-opt view)": 0.22258064516129034,
      "VeMo (max entropy view)": 0.28170594837261503,
      "VeMo (min entropy view)": 0.22258064516129034,
      "VeMo (random view)": 0.22258064516129034,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is standing straight, holding hands."
  },
  "003583": {
    "text": "a person waves both arms in the air.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7502539581689679,
      "Minus Multimodal Distance": -7.949959754943848,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.241805825382471e-05,
      "MoBERT-F": 0.3707585219998228,
      "MoBERT-N": 0.4712544635479221,
      "MoBERT-min(F/N)": 0.3707585219998228,
      "MoBERT-max(F/N)": 0.4712544635479221,
      "MotionCritic": -7.163859844207764,
      "VeMo (human-opt view)": 0.5466666666666666,
      "VeMo (max entropy view)": 0.5463917525773195,
      "VeMo (min entropy view)": 0.5466666666666666,
      "VeMo (random view)": 0.5463917525773195,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person waves both arms in the air."
  },
  "003584": {
    "text": "a person moves something out of the way in a effort to advance forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0222473260743559,
      "Minus Multimodal Distance": -5.616796970367432,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.000356960081262514,
      "MoBERT-F": 0.5649554834090098,
      "MoBERT-N": 0.5259317747189242,
      "MoBERT-min(F/N)": 0.5259317747189242,
      "MoBERT-max(F/N)": 0.5649554834090098,
      "MotionCritic": -10.688274383544922,
      "VeMo (human-opt view)": 0.34854771784232363,
      "VeMo (max entropy view)": 0.34854771784232363,
      "VeMo (min entropy view)": 0.28183118741058655,
      "VeMo (random view)": 0.34854771784232363,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves something out of the way in an effort to move forward."
  },
  "003596": {
    "text": "a person squats down to the ground, picks up a box, then stands back up, and places the box on a higher surface.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8644313918905823,
      "Minus Multimodal Distance": -7.397549152374268,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7576685170060955e-05,
      "MoBERT-F": 0.3757599842356606,
      "MoBERT-N": 0.4616455426371042,
      "MoBERT-min(F/N)": 0.3757599842356606,
      "MoBERT-max(F/N)": 0.4616455426371042,
      "MotionCritic": -8.736519813537598,
      "VeMo (human-opt view)": 5.26023908549579e-06,
      "VeMo (max entropy view)": 5.970794852113363e-06,
      "VeMo (min entropy view)": 5.26023908549579e-06,
      "VeMo (random view)": 5.26023908549579e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person squats down to the ground, picks up a box, then stands back up and places the box on a higher surface."
  },
  "003603": {
    "text": "person kneeling down qith stool",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2366874149837124,
      "Minus Multimodal Distance": -14.105020523071289,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.994523573666811e-05,
      "MoBERT-F": 0.46226314475624725,
      "MoBERT-N": 0.5492472742774721,
      "MoBERT-min(F/N)": 0.46226314475624725,
      "MoBERT-max(F/N)": 0.5492472742774721,
      "MotionCritic": -5.689792156219482,
      "VeMo (human-opt view)": 0.0007118484330979443,
      "VeMo (max entropy view)": 0.0014101272777218203,
      "VeMo (min entropy view)": 0.0007118484330979443,
      "VeMo (random view)": 0.0014101272777218203,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "person kneeling down with stool"
  },
  "003613": {
    "text": "a man picks something up with his left hand and mixes it and then steps back.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6820444293384407,
      "Minus Multimodal Distance": -6.2697930335998535,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0001578246447024867,
      "MoBERT-F": 0.48517479748047343,
      "MoBERT-N": 0.5390587241215822,
      "MoBERT-min(F/N)": 0.48517479748047343,
      "MoBERT-max(F/N)": 0.5390587241215822,
      "MotionCritic": -9.963361740112305,
      "VeMo (human-opt view)": 0.3474178403755869,
      "VeMo (max entropy view)": 0.3474178403755869,
      "VeMo (min entropy view)": 0.6789473684210526,
      "VeMo (random view)": 0.3474178403755869,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man picks something up with his left hand, mixes it, and then steps back."
  },
  "003645": {
    "text": "a person pauses briefly then casually walks downward in a straight line, and stands in a relaxed pose.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7872072925860578,
      "Minus Multimodal Distance": -2.4227609634399414,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2727359464624897e-05,
      "MoBERT-F": 0.3989016042205391,
      "MoBERT-N": 0.44983123862308894,
      "MoBERT-min(F/N)": 0.3989016042205391,
      "MoBERT-max(F/N)": 0.44983123862308894,
      "MotionCritic": -7.3882927894592285,
      "VeMo (human-opt view)": 0.9580838323353293,
      "VeMo (max entropy view)": 0.9435215946843853,
      "VeMo (min entropy view)": 0.9580838323353293,
      "VeMo (random view)": 0.9580838323353293,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person pauses briefly, then casually walks downward in a straight line and stands in a relaxed pose."
  },
  "003664": {
    "text": "a person takes off their t-shirt with one arm.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5552285556633566,
      "Minus Multimodal Distance": -7.276254653930664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6370362320449203e-05,
      "MoBERT-F": 0.4340221115077146,
      "MoBERT-N": 0.5061949666100061,
      "MoBERT-min(F/N)": 0.4340221115077146,
      "MoBERT-max(F/N)": 0.5061949666100061,
      "MotionCritic": -10.636138916015625,
      "VeMo (human-opt view)": 0.07571288102261553,
      "VeMo (max entropy view)": 0.09536784741144415,
      "VeMo (min entropy view)": 0.07571288102261553,
      "VeMo (random view)": 0.07571288102261553,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person takes off their T - shirt with one arm."
  },
  "003677": {
    "text": "person stretches arms out and makes arm circles.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6008337006652332,
      "Minus Multimodal Distance": -8.221515655517578,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.536720659118146e-05,
      "MoBERT-F": 0.5350691190662484,
      "MoBERT-N": 0.5391558965297107,
      "MoBERT-min(F/N)": 0.5350691190662484,
      "MoBERT-max(F/N)": 0.5391558965297107,
      "MotionCritic": -11.205976486206055,
      "VeMo (human-opt view)": 0.955421936554012,
      "VeMo (max entropy view)": 0.955421936554012,
      "VeMo (min entropy view)": 0.9849942285494421,
      "VeMo (random view)": 0.955421936554012,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stretches their arms out and makes arm circles."
  },
  "003685": {
    "text": "ski mountain lodge  go down the mountain slowly",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4673621651478996,
      "Minus Multimodal Distance": -8.903844833374023,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9382708817138337e-05,
      "MoBERT-F": 0.3271031051648102,
      "MoBERT-N": 0.44299461891903125,
      "MoBERT-min(F/N)": 0.3271031051648102,
      "MoBERT-max(F/N)": 0.44299461891903125,
      "MotionCritic": -6.026566505432129,
      "VeMo (human-opt view)": 0.00033577495616271406,
      "VeMo (max entropy view)": 0.0003361157437042524,
      "VeMo (min entropy view)": 0.00033577495616271406,
      "VeMo (random view)": 0.0003361157437042524,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person goes down the mountain slowly from the ski mountain lodge."
  },
  "003696": {
    "text": "a person walks forward quickly and leaves a trail behind them as if they are dripping paint",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4143366280589367,
      "Minus Multimodal Distance": -6.340360164642334,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.016444791108369827,
      "MoBERT-F": 0.6186687587031967,
      "MoBERT-N": 0.5500545160470877,
      "MoBERT-min(F/N)": 0.5500545160470877,
      "MoBERT-max(F/N)": 0.6186687587031967,
      "MotionCritic": -4.962840557098389,
      "VeMo (human-opt view)": 0.17394757744241462,
      "VeMo (max entropy view)": 0.17394757744241462,
      "VeMo (min entropy view)": 0.06363214119832791,
      "VeMo (random view)": 0.06363214119832791,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward quickly and leaves a trail behind them, as if they are dripping paint."
  },
  "003703": {
    "text": "a person runs forward and jumps over something, then turns around and jumps back over it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.287265681128623,
      "Minus Multimodal Distance": -7.482006549835205,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3897560822661035e-05,
      "MoBERT-F": 0.36791501750492883,
      "MoBERT-N": 0.6208424135683265,
      "MoBERT-min(F/N)": 0.36791501750492883,
      "MoBERT-max(F/N)": 0.6208424135683265,
      "MotionCritic": -4.775696277618408,
      "VeMo (human-opt view)": 3.319345682013108e-05,
      "VeMo (max entropy view)": 0.060109289617486336,
      "VeMo (min entropy view)": 3.319345682013108e-05,
      "VeMo (random view)": 0.060109289617486336,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person runs forward and jumps over something, then turns around and jumps back over it."
  },
  "003712": {
    "text": "the person crawls forward at a slow speed",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.3684768791165354,
      "Minus Multimodal Distance": -13.434418678283691,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7588495868258178e-05,
      "MoBERT-F": 0.5142874275328351,
      "MoBERT-N": 0.5377934890484881,
      "MoBERT-min(F/N)": 0.5142874275328351,
      "MoBERT-max(F/N)": 0.5377934890484881,
      "MotionCritic": -6.683158874511719,
      "VeMo (human-opt view)": 0.9840681733975547,
      "VeMo (max entropy view)": 0.9580386610089581,
      "VeMo (min entropy view)": 0.9840681733975547,
      "VeMo (random view)": 0.9840681733975547,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person crawls forward at a slow speed."
  },
  "003721": {
    "text": "the person is walking forward in an odd way.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.151991973161753,
      "Minus Multimodal Distance": -6.532837390899658,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.005222359672188759,
      "MoBERT-F": 0.663229937282825,
      "MoBERT-N": 0.5538224590546833,
      "MoBERT-min(F/N)": 0.5538224590546833,
      "MoBERT-max(F/N)": 0.663229937282825,
      "MotionCritic": -2.0841360092163086,
      "VeMo (human-opt view)": 0.9198113207547169,
      "VeMo (max entropy view)": 0.9198113207547169,
      "VeMo (min entropy view)": 0.9242424242424242,
      "VeMo (random view)": 0.9242424242424242,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking forward in an odd way."
  },
  "003723": {
    "text": "person prepares food on a kitchen worktop.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7687529225719912,
      "Minus Multimodal Distance": -9.206209182739258,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4388462406932376e-05,
      "MoBERT-F": 0.5168610369745263,
      "MoBERT-N": 0.5777900706707397,
      "MoBERT-min(F/N)": 0.5168610369745263,
      "MoBERT-max(F/N)": 0.5777900706707397,
      "MotionCritic": -10.457954406738281,
      "VeMo (human-opt view)": 3.192289099965007e-06,
      "VeMo (max entropy view)": 3.7263802022188208e-06,
      "VeMo (min entropy view)": 3.192289099965007e-06,
      "VeMo (random view)": 3.7263802022188208e-06,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person prepares food on a kitchen worktop."
  },
  "003729": {
    "text": "a person in boxing class",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0231503372905209,
      "Minus Multimodal Distance": -10.360091209411621,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.381615740887355e-05,
      "MoBERT-F": 0.3846321996327792,
      "MoBERT-N": 0.4042193220532367,
      "MoBERT-min(F/N)": 0.3846321996327792,
      "MoBERT-max(F/N)": 0.4042193220532367,
      "MotionCritic": -1.4693032503128052,
      "VeMo (human-opt view)": 0.915068493150685,
      "VeMo (max entropy view)": 0.899188876013905,
      "VeMo (min entropy view)": 0.915068493150685,
      "VeMo (random view)": 0.915068493150685,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person in a boxing class"
  },
  "003751": {
    "text": "a person placing something from left to right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.36729995414946615,
      "Minus Multimodal Distance": -8.789094924926758,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3749113097437657e-05,
      "MoBERT-F": 0.409184362339055,
      "MoBERT-N": 0.4244142156106961,
      "MoBERT-min(F/N)": 0.409184362339055,
      "MoBERT-max(F/N)": 0.4244142156106961,
      "MotionCritic": -2.186467409133911,
      "VeMo (human-opt view)": 0.03319799379030332,
      "VeMo (max entropy view)": 0.03319799379030332,
      "VeMo (min entropy view)": 0.01587939698492462,
      "VeMo (random view)": 0.01587939698492462,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person places something from left to right."
  },
  "003761": {
    "text": "a person sitting and readjusting position to the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9954178314133658,
      "Minus Multimodal Distance": -8.609044075012207,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3792321371729486e-05,
      "MoBERT-F": 0.4138702055970901,
      "MoBERT-N": 0.38335236794469174,
      "MoBERT-min(F/N)": 0.38335236794469174,
      "MoBERT-max(F/N)": 0.4138702055970901,
      "MotionCritic": 2.4517691135406494,
      "VeMo (human-opt view)": 0.6501128668171557,
      "VeMo (max entropy view)": 0.6501128668171557,
      "VeMo (min entropy view)": 0.6514084507042254,
      "VeMo (random view)": 0.6514084507042254,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is sitting and readjusting their position to the left."
  },
  "003763": {
    "text": "a person side steps to the right and then stands up straight.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8131620071635668,
      "Minus Multimodal Distance": -10.944477081298828,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6742056434159167e-05,
      "MoBERT-F": 0.4450476176033501,
      "MoBERT-N": 0.6172144460542454,
      "MoBERT-min(F/N)": 0.4450476176033501,
      "MoBERT-max(F/N)": 0.6172144460542454,
      "MotionCritic": -4.673019886016846,
      "VeMo (human-opt view)": 0.957933868127568,
      "VeMo (max entropy view)": 0.8934269304403318,
      "VeMo (min entropy view)": 0.957933868127568,
      "VeMo (random view)": 0.8934269304403318,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person side - steps to the right and then stands up straight."
  },
  "003771": {
    "text": "a person acting like a tiger",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9076283251474309,
      "Minus Multimodal Distance": -6.247795104980469,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0003685954143293202,
      "MoBERT-F": 0.5940088127550206,
      "MoBERT-N": 0.5213016086884792,
      "MoBERT-min(F/N)": 0.5213016086884792,
      "MoBERT-max(F/N)": 0.5940088127550206,
      "MotionCritic": -5.249986171722412,
      "VeMo (human-opt view)": 0.009114745910850293,
      "VeMo (max entropy view)": 0.11900532859680284,
      "VeMo (min entropy view)": 0.009114745910850293,
      "VeMo (random view)": 0.009114745910850293,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person acting like a tiger."
  },
  "003780": {
    "text": "a person clasps its arms together then goes back to standing position with arms hanging.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.38173611425282256,
      "Minus Multimodal Distance": -6.160795211791992,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.851962123415433e-05,
      "MoBERT-F": 0.45240559417309073,
      "MoBERT-N": 0.5315448669598773,
      "MoBERT-min(F/N)": 0.45240559417309073,
      "MoBERT-max(F/N)": 0.5315448669598773,
      "MotionCritic": -0.7956233620643616,
      "VeMo (human-opt view)": 0.998295153926045,
      "VeMo (max entropy view)": 0.6653306613226453,
      "VeMo (min entropy view)": 0.998295153926045,
      "VeMo (random view)": 0.6653306613226453,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person clasps their arms together, then goes back to the standing position with their arms hanging."
  },
  "003782": {
    "text": "person walked forward picked up an item and walked to the back and set the item down",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.736315938768643,
      "Minus Multimodal Distance": -10.089097023010254,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8054207563400269,
      "MoBERT-F": 0.5618052147689662,
      "MoBERT-N": 0.5284127575130577,
      "MoBERT-min(F/N)": 0.5284127575130577,
      "MoBERT-max(F/N)": 0.5618052147689662,
      "MotionCritic": -11.49317455291748,
      "VeMo (human-opt view)": 3.024896504370708e-05,
      "VeMo (max entropy view)": 0.00011242054276638048,
      "VeMo (min entropy view)": 3.024896504370708e-05,
      "VeMo (random view)": 0.00011242054276638048,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walked forward, picked up an item, walked to the back, and set the item down."
  },
  "003783": {
    "text": "a person lunges over and gets back up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.401587000249104,
      "Minus Multimodal Distance": -9.588305473327637,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0043329293839633465,
      "MoBERT-F": 0.4287663413062719,
      "MoBERT-N": 0.47933573878205027,
      "MoBERT-min(F/N)": 0.4287663413062719,
      "MoBERT-max(F/N)": 0.47933573878205027,
      "MotionCritic": -3.960357189178467,
      "VeMo (human-opt view)": 0.997363107257394,
      "VeMo (max entropy view)": 0.9966075301707358,
      "VeMo (min entropy view)": 0.997363107257394,
      "VeMo (random view)": 0.9966075301707358,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person lunges forward and then gets back up."
  },
  "003784": {
    "text": "a man walks in clockwise direction.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1844907725199125,
      "Minus Multimodal Distance": -2.8733742237091064,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2707166863256134e-05,
      "MoBERT-F": 0.3864311325843244,
      "MoBERT-N": 0.49095764313761625,
      "MoBERT-min(F/N)": 0.3864311325843244,
      "MoBERT-max(F/N)": 0.49095764313761625,
      "MotionCritic": -2.3358564376831055,
      "VeMo (human-opt view)": 0.8670309653916212,
      "VeMo (max entropy view)": 0.8593091828138163,
      "VeMo (min entropy view)": 0.8670309653916212,
      "VeMo (random view)": 0.8593091828138163,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks in a clockwise direction."
  },
  "003785": {
    "text": "a person is scrubbing a window",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9682791086154641,
      "Minus Multimodal Distance": -10.016695022583008,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7611265977611765e-05,
      "MoBERT-F": 0.31805072628201597,
      "MoBERT-N": 0.42657840289518834,
      "MoBERT-min(F/N)": 0.31805072628201597,
      "MoBERT-max(F/N)": 0.42657840289518834,
      "MotionCritic": -4.58033561706543,
      "VeMo (human-opt view)": 6.753844626053381e-06,
      "VeMo (max entropy view)": 6.753844626053381e-06,
      "VeMo (min entropy view)": 5.933938312472715e-06,
      "VeMo (random view)": 5.933938312472715e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is scrubbing a window."
  },
  "003790": {
    "text": "a person with their shoulders held high",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5036072959317457,
      "Minus Multimodal Distance": -6.740557670593262,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00045496970415115356,
      "MoBERT-F": 0.6033426713598367,
      "MoBERT-N": 0.6392290748874347,
      "MoBERT-min(F/N)": 0.6033426713598367,
      "MoBERT-max(F/N)": 0.6392290748874347,
      "MotionCritic": -2.053062915802002,
      "VeMo (human-opt view)": 0.452755905511811,
      "VeMo (max entropy view)": 0.452755905511811,
      "VeMo (min entropy view)": 0.6656101426307448,
      "VeMo (random view)": 0.452755905511811,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person with their shoulders held high."
  },
  "003800": {
    "text": "a person does the cat walk",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7775692560488106,
      "Minus Multimodal Distance": -9.423139572143555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.002152286237105727,
      "MoBERT-F": 0.681952620245185,
      "MoBERT-N": 0.7161972771097701,
      "MoBERT-min(F/N)": 0.681952620245185,
      "MoBERT-max(F/N)": 0.7161972771097701,
      "MotionCritic": -11.040447235107422,
      "VeMo (human-opt view)": 0.5310734463276836,
      "VeMo (max entropy view)": 0.5310734463276836,
      "VeMo (min entropy view)": 0.5319148936170213,
      "VeMo (random view)": 0.5310734463276836,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does the catwalk."
  },
  "003807": {
    "text": "the person walks in a straight line and places their right hand to support their weight against something.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3809342739697255,
      "Minus Multimodal Distance": -9.270475387573242,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.0065759094431996e-05,
      "MoBERT-F": 0.37157164013811084,
      "MoBERT-N": 0.28654027740286814,
      "MoBERT-min(F/N)": 0.28654027740286814,
      "MoBERT-max(F/N)": 0.37157164013811084,
      "MotionCritic": -14.77437973022461,
      "VeMo (human-opt view)": 0.2689075630252101,
      "VeMo (max entropy view)": 0.362962962962963,
      "VeMo (min entropy view)": 0.2689075630252101,
      "VeMo (random view)": 0.362962962962963,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person walks in a straight line and places their right hand against something to support their weight."
  },
  "003812": {
    "text": "a person whos put both hands together and is begging",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9556511842786788,
      "Minus Multimodal Distance": -8.748973846435547,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.03208177909255028,
      "MoBERT-F": 0.6111272997271437,
      "MoBERT-N": 0.5617386022531553,
      "MoBERT-min(F/N)": 0.5617386022531553,
      "MoBERT-max(F/N)": 0.6111272997271437,
      "MotionCritic": -3.7638792991638184,
      "VeMo (human-opt view)": 0.8739859383450513,
      "VeMo (max entropy view)": 0.5160142348754448,
      "VeMo (min entropy view)": 0.8739859383450513,
      "VeMo (random view)": 0.5160142348754448,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who puts both hands together and is begging."
  },
  "003823": {
    "text": "person scatches head and armpit like a monkey then pretends to hold a baby",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9989365355189431,
      "Minus Multimodal Distance": -10.68206787109375,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0004576152714435011,
      "MoBERT-F": 0.5948326420008057,
      "MoBERT-N": 0.5964650106689809,
      "MoBERT-min(F/N)": 0.5948326420008057,
      "MoBERT-max(F/N)": 0.5964650106689809,
      "MotionCritic": -6.913072109222412,
      "VeMo (human-opt view)": 0.001501556491485076,
      "VeMo (max entropy view)": 0.009076348104644954,
      "VeMo (min entropy view)": 0.001501556491485076,
      "VeMo (random view)": 0.009076348104644954,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person scratches their head and armpit like a monkey, then pretends to hold a baby."
  },
  "003831": {
    "text": "a person swinging golf club or swinging and hitting ball with a bat.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7637457144259447,
      "Minus Multimodal Distance": -8.94901180267334,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.446471939445473e-05,
      "MoBERT-F": 0.5573904893479855,
      "MoBERT-N": 0.6014762539224066,
      "MoBERT-min(F/N)": 0.5573904893479855,
      "MoBERT-max(F/N)": 0.6014762539224066,
      "MotionCritic": -6.086828708648682,
      "VeMo (human-opt view)": 1.109075608086023e-05,
      "VeMo (max entropy view)": 4.684496484887681e-05,
      "VeMo (min entropy view)": 1.109075608086023e-05,
      "VeMo (random view)": 1.109075608086023e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person swinging a golf club or swinging and hitting a ball with a bat."
  },
  "003858": {
    "text": "a person raises both hands and waves them in various ways.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7057486564943553,
      "Minus Multimodal Distance": -5.709231853485107,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9309612727956846e-05,
      "MoBERT-F": 0.33294451829620797,
      "MoBERT-N": 0.3967191298205308,
      "MoBERT-min(F/N)": 0.33294451829620797,
      "MoBERT-max(F/N)": 0.3967191298205308,
      "MotionCritic": -4.673415660858154,
      "VeMo (human-opt view)": 0.9986794976912786,
      "VeMo (max entropy view)": 0.9959422714913198,
      "VeMo (min entropy view)": 0.9986794976912786,
      "VeMo (random view)": 0.9959422714913198,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises both hands and waves them in various ways."
  },
  "003859": {
    "text": "person is walking forwards quite fast, then squats down to pick something up to then turn around and walk fast again. appears to be in a rush and moving an item",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7134459067347394,
      "Minus Multimodal Distance": -8.762518882751465,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00029282542527653277,
      "MoBERT-F": 0.5559795594773054,
      "MoBERT-N": 0.5515312402435382,
      "MoBERT-min(F/N)": 0.5515312402435382,
      "MoBERT-max(F/N)": 0.5559795594773054,
      "MotionCritic": -7.042967319488525,
      "VeMo (human-opt view)": 0.18219749652294853,
      "VeMo (max entropy view)": 0.23395445134575568,
      "VeMo (min entropy view)": 0.18219749652294853,
      "VeMo (random view)": 0.23395445134575568,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking forward quite fast, then squats down to pick something up. After that, they turn around and walk fast again. They appear to be in a rush and moving an item."
  },
  "003868": {
    "text": "a person taking something from a shelf.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5157485754606569,
      "Minus Multimodal Distance": -7.840290069580078,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.489229679107666,
      "MoBERT-F": 0.6721593913177146,
      "MoBERT-N": 0.609054440911098,
      "MoBERT-min(F/N)": 0.609054440911098,
      "MoBERT-max(F/N)": 0.6721593913177146,
      "MotionCritic": -7.062058448791504,
      "VeMo (human-opt view)": 6.130725876581404e-06,
      "VeMo (max entropy view)": 0.00035700383022753445,
      "VeMo (min entropy view)": 6.130725876581404e-06,
      "VeMo (random view)": 6.130725876581404e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is taking something from a shelf."
  },
  "003900": {
    "text": "a person bends down as if pushed.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5307681328990034,
      "Minus Multimodal Distance": -8.567475318908691,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4322125682374462e-05,
      "MoBERT-F": 0.3136356237121187,
      "MoBERT-N": 0.4622641072065399,
      "MoBERT-min(F/N)": 0.3136356237121187,
      "MoBERT-max(F/N)": 0.4622641072065399,
      "MotionCritic": -0.1546413004398346,
      "VeMo (human-opt view)": 0.053323593864134405,
      "VeMo (max entropy view)": 0.053323593864134405,
      "VeMo (min entropy view)": 0.0011705033164260631,
      "VeMo (random view)": 0.0011705033164260631,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends down as if pushed."
  },
  "003902": {
    "text": "the person checks the watch on his hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6020538075168644,
      "Minus Multimodal Distance": -10.61174488067627,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.831177986692637e-05,
      "MoBERT-F": 0.26939517424331827,
      "MoBERT-N": 0.3915913808427138,
      "MoBERT-min(F/N)": 0.26939517424331827,
      "MoBERT-max(F/N)": 0.3915913808427138,
      "MotionCritic": -6.101255893707275,
      "VeMo (human-opt view)": 0.000971277924240322,
      "VeMo (max entropy view)": 0.05361466620546524,
      "VeMo (min entropy view)": 0.000971277924240322,
      "VeMo (random view)": 0.05361466620546524,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person checks the watch on his hand."
  },
  "003910": {
    "text": "a man walks forward then right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0069729481410734,
      "Minus Multimodal Distance": -5.360311508178711,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.50190933002159e-05,
      "MoBERT-F": 0.43715277227782556,
      "MoBERT-N": 0.5952055288100557,
      "MoBERT-min(F/N)": 0.43715277227782556,
      "MoBERT-max(F/N)": 0.5952055288100557,
      "MotionCritic": -5.834993839263916,
      "VeMo (human-opt view)": 0.9526488513830286,
      "VeMo (max entropy view)": 0.9526488513830286,
      "VeMo (min entropy view)": 0.9740413756753196,
      "VeMo (random view)": 0.9526488513830286,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man walks forward and then turns right."
  },
  "003934": {
    "text": "a person stands, waving back and forth with one arm.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7364146201902897,
      "Minus Multimodal Distance": -7.957580089569092,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.0618630262324587e-05,
      "MoBERT-F": 0.5101377438644019,
      "MoBERT-N": 0.5241320571361656,
      "MoBERT-min(F/N)": 0.5101377438644019,
      "MoBERT-max(F/N)": 0.5241320571361656,
      "MotionCritic": -5.65791654586792,
      "VeMo (human-opt view)": 0.9937030966962822,
      "VeMo (max entropy view)": 0.9937030966962822,
      "VeMo (min entropy view)": 0.9947933863307342,
      "VeMo (random view)": 0.9937030966962822,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands, waving back and forth with one arm."
  },
  "003942": {
    "text": "walking around in a circle.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8560867317548884,
      "Minus Multimodal Distance": -11.081344604492188,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.45209255814552307,
      "MoBERT-F": 0.6730274650190484,
      "MoBERT-N": 0.6020280369309507,
      "MoBERT-min(F/N)": 0.6020280369309507,
      "MoBERT-max(F/N)": 0.6730274650190484,
      "MotionCritic": -13.795668601989746,
      "VeMo (human-opt view)": 0.8870967741935484,
      "VeMo (max entropy view)": 0.8870967741935484,
      "VeMo (min entropy view)": 0.909952606635071,
      "VeMo (random view)": 0.909952606635071,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking around in a circle."
  },
  "003954": {
    "text": "a person standing in one position, gently swinging both arms in back-and-forth motion",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4563389885197362,
      "Minus Multimodal Distance": -10.276839256286621,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5188135623466223e-05,
      "MoBERT-F": 0.4266214571818423,
      "MoBERT-N": 0.46937791473370666,
      "MoBERT-min(F/N)": 0.4266214571818423,
      "MoBERT-max(F/N)": 0.46937791473370666,
      "MotionCritic": -4.9065842628479,
      "VeMo (human-opt view)": 0.029372976861287616,
      "VeMo (max entropy view)": 0.029372976861287616,
      "VeMo (min entropy view)": 0.02595797280593325,
      "VeMo (random view)": 0.02595797280593325,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands in one position, gently swinging both arms in a back-and-forth motion."
  },
  "003964": {
    "text": "a person opens and drinks from a container.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7862939301764694,
      "Minus Multimodal Distance": -7.462489128112793,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.0684866942465305e-05,
      "MoBERT-F": 0.31179257907528884,
      "MoBERT-N": 0.4189980409620807,
      "MoBERT-min(F/N)": 0.31179257907528884,
      "MoBERT-max(F/N)": 0.4189980409620807,
      "MotionCritic": -7.034124851226807,
      "VeMo (human-opt view)": 9.255245410336308e-06,
      "VeMo (max entropy view)": 1.3876459813247176e-05,
      "VeMo (min entropy view)": 9.255245410336308e-06,
      "VeMo (random view)": 1.3876459813247176e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person opens a container and drinks from it."
  },
  "003997": {
    "text": "a person bounces up and down on their toes with their arms bouncing along loosely by their sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9930178750668729,
      "Minus Multimodal Distance": -9.662142753601074,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.3583919107913971,
      "MoBERT-F": 0.7470605750374919,
      "MoBERT-N": 0.7169980361703334,
      "MoBERT-min(F/N)": 0.7169980361703334,
      "MoBERT-max(F/N)": 0.7470605750374919,
      "MotionCritic": -7.508553981781006,
      "VeMo (human-opt view)": 0.6371308016877637,
      "VeMo (max entropy view)": 0.6371308016877637,
      "VeMo (min entropy view)": 0.18243243243243243,
      "VeMo (random view)": 0.6371308016877637,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person bounces up and down on their toes, with their arms bouncing loosely by their sides."
  },
  "004008": {
    "text": "a person walks quickly in a diagonal direction for 8 steps and then turns to walk in the direction they came from.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8786380890895787,
      "Minus Multimodal Distance": -7.11670446395874,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.221184149675537e-05,
      "MoBERT-F": 0.3553581529453935,
      "MoBERT-N": 0.5436438000754351,
      "MoBERT-min(F/N)": 0.3553581529453935,
      "MoBERT-max(F/N)": 0.5436438000754351,
      "MotionCritic": 1.0394357442855835,
      "VeMo (human-opt view)": 0.5161290322580645,
      "VeMo (max entropy view)": 0.484251968503937,
      "VeMo (min entropy view)": 0.5161290322580645,
      "VeMo (random view)": 0.484251968503937,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks quickly in a diagonal direction for 8 steps and then turns to walk in the direction they came from."
  },
  "004012": {
    "text": "the person raises their left foot up to their kinee and then kicks their foot out,  then returns their foot to their knee.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0472960112892833,
      "Minus Multimodal Distance": -9.85407829284668,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5057968741748482e-05,
      "MoBERT-F": 0.36339481615126334,
      "MoBERT-N": 0.6023756220982351,
      "MoBERT-min(F/N)": 0.36339481615126334,
      "MoBERT-max(F/N)": 0.6023756220982351,
      "MotionCritic": -11.929239273071289,
      "VeMo (human-opt view)": 0.08484848484848485,
      "VeMo (max entropy view)": 0.2568527918781726,
      "VeMo (min entropy view)": 0.08484848484848485,
      "VeMo (random view)": 0.08484848484848485,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person raises their left foot up to their knee and then kicks their foot out, then returns their foot to their knee."
  },
  "004015": {
    "text": "person walks forward with left hand extended to side, trying to feel something.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2594249803033224,
      "Minus Multimodal Distance": -4.609428882598877,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.0111708838376217e-05,
      "MoBERT-F": 0.4273331599386787,
      "MoBERT-N": 0.4762365882289566,
      "MoBERT-min(F/N)": 0.4273331599386787,
      "MoBERT-max(F/N)": 0.4762365882289566,
      "MotionCritic": -9.449531555175781,
      "VeMo (human-opt view)": 0.25740318906605925,
      "VeMo (max entropy view)": 0.5935251798561151,
      "VeMo (min entropy view)": 0.25740318906605925,
      "VeMo (random view)": 0.25740318906605925,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward with their left hand extended to the side, trying to feel something."
  },
  "004050": {
    "text": "the person to march forward and turned left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4590118520432782,
      "Minus Multimodal Distance": -5.225456237792969,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.014199652709066868,
      "MoBERT-F": 0.7660143627951448,
      "MoBERT-N": 0.8731381874733549,
      "MoBERT-min(F/N)": 0.7660143627951448,
      "MoBERT-max(F/N)": 0.8731381874733549,
      "MotionCritic": -16.657550811767578,
      "VeMo (human-opt view)": 0.7435456110154905,
      "VeMo (max entropy view)": 0.7435456110154905,
      "VeMo (min entropy view)": 0.8441145281018028,
      "VeMo (random view)": 0.7435456110154905,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person marched forward and turned left."
  },
  "004063": {
    "text": "a person stands and trying to hold balance.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7698043422673132,
      "Minus Multimodal Distance": -6.482120513916016,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.25541753025027e-05,
      "MoBERT-F": 0.40903109997318393,
      "MoBERT-N": 0.4994654051418354,
      "MoBERT-min(F/N)": 0.40903109997318393,
      "MoBERT-max(F/N)": 0.4994654051418354,
      "MotionCritic": -4.19991397857666,
      "VeMo (human-opt view)": 0.9908416921064108,
      "VeMo (max entropy view)": 0.9399656946826758,
      "VeMo (min entropy view)": 0.9908416921064108,
      "VeMo (random view)": 0.9908416921064108,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands and tries to hold their balance."
  },
  "004067": {
    "text": "a person reaches forward with the right hand and then lets their arm drop to their side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4368256711212728,
      "Minus Multimodal Distance": -4.6210737228393555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.293444049428217e-05,
      "MoBERT-F": 0.2848621766335737,
      "MoBERT-N": 0.45452401943507675,
      "MoBERT-min(F/N)": 0.2848621766335737,
      "MoBERT-max(F/N)": 0.45452401943507675,
      "MotionCritic": -6.827223300933838,
      "VeMo (human-opt view)": 0.9976712133453733,
      "VeMo (max entropy view)": 0.8739859383450513,
      "VeMo (min entropy view)": 0.9976712133453733,
      "VeMo (random view)": 0.9976712133453733,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person reaches forward with their right hand and then lets their arm drop to their side."
  },
  "004068": {
    "text": "a person takes a step backwards.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.49532174800800627,
      "Minus Multimodal Distance": -5.516451835632324,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2767229893361218e-05,
      "MoBERT-F": 0.40852539963467477,
      "MoBERT-N": 0.5306901117716163,
      "MoBERT-min(F/N)": 0.40852539963467477,
      "MoBERT-max(F/N)": 0.5306901117716163,
      "MotionCritic": -7.6997599601745605,
      "VeMo (human-opt view)": 0.8263988522238164,
      "VeMo (max entropy view)": 0.8263988522238164,
      "VeMo (min entropy view)": 0.8518518518518519,
      "VeMo (random view)": 0.8518518518518519,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person takes a step backwards."
  },
  "004110": {
    "text": "a person jumping over a puddle",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0981902775975252,
      "Minus Multimodal Distance": -3.867814540863037,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.6125305891036987,
      "MoBERT-F": 0.7163347564970964,
      "MoBERT-N": 0.6445647949781783,
      "MoBERT-min(F/N)": 0.6445647949781783,
      "MoBERT-max(F/N)": 0.7163347564970964,
      "MotionCritic": -6.862137317657471,
      "VeMo (human-opt view)": 0.010369292341277061,
      "VeMo (max entropy view)": 0.010369292341277061,
      "VeMo (min entropy view)": 0.0015972405953885802,
      "VeMo (random view)": 0.0015972405953885802,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is jumping over a puddle."
  },
  "004134": {
    "text": "the man stretches his arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0720109411693761,
      "Minus Multimodal Distance": -2.702726364135742,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.222422259976156e-05,
      "MoBERT-F": 0.5419054529409595,
      "MoBERT-N": 0.6217905876606158,
      "MoBERT-min(F/N)": 0.5419054529409595,
      "MoBERT-max(F/N)": 0.6217905876606158,
      "MotionCritic": -5.8987507820129395,
      "VeMo (human-opt view)": 0.9668828951860703,
      "VeMo (max entropy view)": 0.9668828951860703,
      "VeMo (min entropy view)": 0.9795918367346939,
      "VeMo (random view)": 0.9668828951860703,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man stretches his arms."
  },
  "004157": {
    "text": "a man kicks with his right leg and then kicks with his left leg.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4600305511782743,
      "Minus Multimodal Distance": -1.666666030883789,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.690913424885366e-05,
      "MoBERT-F": 0.4178757503124576,
      "MoBERT-N": 0.5494233728188864,
      "MoBERT-min(F/N)": 0.4178757503124576,
      "MoBERT-max(F/N)": 0.5494233728188864,
      "MotionCritic": -8.215641975402832,
      "VeMo (human-opt view)": 0.5304659498207885,
      "VeMo (max entropy view)": 0.5304659498207885,
      "VeMo (min entropy view)": 0.5928753180661578,
      "VeMo (random view)": 0.5928753180661578,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man kicks with his right leg and then with his left leg."
  },
  "004158": {
    "text": "a person walks forward using their left hand to steady themselves on an object.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9911705724515627,
      "Minus Multimodal Distance": -8.72354793548584,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.29776739818044e-05,
      "MoBERT-F": 0.35358753086124034,
      "MoBERT-N": 0.45979952071391034,
      "MoBERT-min(F/N)": 0.35358753086124034,
      "MoBERT-max(F/N)": 0.45979952071391034,
      "MotionCritic": -6.781008243560791,
      "VeMo (human-opt view)": 0.17277486910994763,
      "VeMo (max entropy view)": 0.2451708766716196,
      "VeMo (min entropy view)": 0.17277486910994763,
      "VeMo (random view)": 0.17277486910994763,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, using their left hand to steady themselves on an object."
  },
  "004163": {
    "text": "he is stretching his arm then moving it",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2875233021488304,
      "Minus Multimodal Distance": -5.774890422821045,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.166697181062773e-05,
      "MoBERT-F": 0.3549120762826281,
      "MoBERT-N": 0.439891514763468,
      "MoBERT-min(F/N)": 0.3549120762826281,
      "MoBERT-max(F/N)": 0.439891514763468,
      "MotionCritic": -8.223421096801758,
      "VeMo (human-opt view)": 0.9668898136421205,
      "VeMo (max entropy view)": 0.9626461362988309,
      "VeMo (min entropy view)": 0.9668898136421205,
      "VeMo (random view)": 0.9626461362988309,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "He is stretching his arm and then moving it."
  },
  "004176": {
    "text": "a person walks forward and moves something with his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8188499938709873,
      "Minus Multimodal Distance": -11.562278747558594,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2964992240304127e-05,
      "MoBERT-F": 0.43534568775098237,
      "MoBERT-N": 0.5195548186630177,
      "MoBERT-min(F/N)": 0.43534568775098237,
      "MoBERT-max(F/N)": 0.5195548186630177,
      "MotionCritic": -12.08506965637207,
      "VeMo (human-opt view)": 0.8356435643564356,
      "VeMo (max entropy view)": 0.8356435643564356,
      "VeMo (min entropy view)": 0.9152,
      "VeMo (random view)": 0.8356435643564356,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and moves something with his right hand."
  },
  "004183": {
    "text": "a person briskly walks foward swinging their left arm back and forth.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2090898733078377,
      "Minus Multimodal Distance": -7.592811584472656,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.4504696890944615e-05,
      "MoBERT-F": 0.5183836458475236,
      "MoBERT-N": 0.49542479621132396,
      "MoBERT-min(F/N)": 0.49542479621132396,
      "MoBERT-max(F/N)": 0.5183836458475236,
      "MotionCritic": -2.1713671684265137,
      "VeMo (human-opt view)": 0.5324675324675324,
      "VeMo (max entropy view)": 0.5324675324675324,
      "VeMo (min entropy view)": 0.5782747603833865,
      "VeMo (random view)": 0.5324675324675324,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person briskly walks forward, swinging their left arm back and forth."
  },
  "004192": {
    "text": "person moves forward to their left side and picks something up and does a full turn back",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8127182892049996,
      "Minus Multimodal Distance": -12.39480972290039,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.11736305803060532,
      "MoBERT-F": 0.7483762751968908,
      "MoBERT-N": 0.6711021364728349,
      "MoBERT-min(F/N)": 0.6711021364728349,
      "MoBERT-max(F/N)": 0.7483762751968908,
      "MotionCritic": -5.29746675491333,
      "VeMo (human-opt view)": 0.014980504822491279,
      "VeMo (max entropy view)": 0.041916167664670656,
      "VeMo (min entropy view)": 0.014980504822491279,
      "VeMo (random view)": 0.014980504822491279,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person moves forward to their left side, picks something up, and does a full turn backward."
  },
  "004206": {
    "text": "a person stretches their leg by lunging back and forth on it, stretches their arms one at a time by lifting them and bending at the waist in the opposite direction, then finally walks forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1005636973701716,
      "Minus Multimodal Distance": -6.155407905578613,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0012414266820997,
      "MoBERT-F": 0.47539863396540294,
      "MoBERT-N": 0.4442214091542157,
      "MoBERT-min(F/N)": 0.4442214091542157,
      "MoBERT-max(F/N)": 0.47539863396540294,
      "MotionCritic": -18.63027000427246,
      "VeMo (human-opt view)": 0.7770582793709528,
      "VeMo (max entropy view)": 0.7770582793709528,
      "VeMo (min entropy view)": 0.843846949327818,
      "VeMo (random view)": 0.7770582793709528,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stretches their leg by lunging back and forth. They stretch their arms one at a time by lifting them and bending at the waist in the opposite direction. Then, finally, they walk forward."
  },
  "004222": {
    "text": "a person walks in a clockwise circle and raises their hand to their face to yawn.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5343251504432904,
      "Minus Multimodal Distance": -12.543774604797363,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0010948735289275646,
      "MoBERT-F": 0.4698921403852336,
      "MoBERT-N": 0.5206095674407407,
      "MoBERT-min(F/N)": 0.4698921403852336,
      "MoBERT-max(F/N)": 0.5206095674407407,
      "MotionCritic": -8.296836853027344,
      "VeMo (human-opt view)": 0.7776628748707343,
      "VeMo (max entropy view)": 0.7776628748707343,
      "VeMo (min entropy view)": 0.8356909684439608,
      "VeMo (random view)": 0.8356909684439608,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks in a clockwise circle and raises their hand to their face to yawn."
  },
  "004235": {
    "text": "a person steps forward and reaches out with their right hand to pick up an object that is at their waist level. they raise it to their mouth and take a sip, then put the object down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7353448759632505,
      "Minus Multimodal Distance": -7.00877571105957,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3942682673805393e-05,
      "MoBERT-F": 0.4066504940784929,
      "MoBERT-N": 0.5557324302639344,
      "MoBERT-min(F/N)": 0.4066504940784929,
      "MoBERT-max(F/N)": 0.5557324302639344,
      "MotionCritic": -8.951457977294922,
      "VeMo (human-opt view)": 0.0003790534049900044,
      "VeMo (max entropy view)": 0.0003790534049900044,
      "VeMo (min entropy view)": 0.00020273684656451824,
      "VeMo (random view)": 0.00020273684656451824,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps forward, reaches out with their right hand to pick up an object at waist level. They raise it to their mouth, take a sip, then put the object down."
  },
  "004270": {
    "text": "person is reaching down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6263887879718082,
      "Minus Multimodal Distance": -6.539270877838135,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00030387245351448655,
      "MoBERT-F": 0.4894769079707907,
      "MoBERT-N": 0.5341290853651145,
      "MoBERT-min(F/N)": 0.4894769079707907,
      "MoBERT-max(F/N)": 0.5341290853651145,
      "MotionCritic": -8.949045181274414,
      "VeMo (human-opt view)": 0.9467455621301775,
      "VeMo (max entropy view)": 0.9398847104035136,
      "VeMo (min entropy view)": 0.9467455621301775,
      "VeMo (random view)": 0.9467455621301775,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is reaching down."
  },
  "004284": {
    "text": "a person walks with side steps to the right and then walks forward, then turn around and walks back",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8357907262318327,
      "Minus Multimodal Distance": -4.899005889892578,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9747922420501709,
      "MoBERT-F": 0.6765279304912813,
      "MoBERT-N": 0.6997266479198858,
      "MoBERT-min(F/N)": 0.6765279304912813,
      "MoBERT-max(F/N)": 0.6997266479198858,
      "MotionCritic": -13.166473388671875,
      "VeMo (human-opt view)": 0.9100169779286927,
      "VeMo (max entropy view)": 0.8931245745405038,
      "VeMo (min entropy view)": 0.9100169779286927,
      "VeMo (random view)": 0.8931245745405038,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks with side steps to the right, then walks forward, then turns around and walks back."
  },
  "004293": {
    "text": "a person confidently walks down an aisle while stretching/loosening up their arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4583806241760426,
      "Minus Multimodal Distance": -7.899809837341309,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.03914034739136696,
      "MoBERT-F": 0.5818233719522414,
      "MoBERT-N": 0.4712569149314977,
      "MoBERT-min(F/N)": 0.4712569149314977,
      "MoBERT-max(F/N)": 0.5818233719522414,
      "MotionCritic": -3.3480710983276367,
      "VeMo (human-opt view)": 0.928416485900217,
      "VeMo (max entropy view)": 0.9283878075651855,
      "VeMo (min entropy view)": 0.928416485900217,
      "VeMo (random view)": 0.9283878075651855,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person confidently walks down an aisle while stretching or loosening up their arm."
  },
  "004297": {
    "text": "a man steps back and shields his face with both of his hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9375551251339498,
      "Minus Multimodal Distance": -4.101123809814453,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.013520403765141964,
      "MoBERT-F": 0.4390658015010026,
      "MoBERT-N": 0.5210995819469596,
      "MoBERT-min(F/N)": 0.4390658015010026,
      "MoBERT-max(F/N)": 0.5210995819469596,
      "MotionCritic": -1.3197613954544067,
      "VeMo (human-opt view)": 0.9241311796377876,
      "VeMo (max entropy view)": 0.8591885441527446,
      "VeMo (min entropy view)": 0.9241311796377876,
      "VeMo (random view)": 0.9241311796377876,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man steps back and shields his face with both of his hands."
  },
  "004299": {
    "text": "a man is picking an object up from the left side and then places it to his right side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4166796619462569,
      "Minus Multimodal Distance": -8.732266426086426,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.1830768421059474e-05,
      "MoBERT-F": 0.4361963947955404,
      "MoBERT-N": 0.5480257132938332,
      "MoBERT-min(F/N)": 0.4361963947955404,
      "MoBERT-max(F/N)": 0.5480257132938332,
      "MotionCritic": -2.815335750579834,
      "VeMo (human-opt view)": 0.34843205574912894,
      "VeMo (max entropy view)": 0.34843205574912894,
      "VeMo (min entropy view)": 0.02931311068980604,
      "VeMo (random view)": 0.02931311068980604,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is picking up an object from the left side and then placing it to his right side."
  },
  "004307": {
    "text": "a person vaults over an obstacle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6614469543223056,
      "Minus Multimodal Distance": -4.636950969696045,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.7922059893608093,
      "MoBERT-F": 0.7697611922095704,
      "MoBERT-N": 0.6483819577627081,
      "MoBERT-min(F/N)": 0.6483819577627081,
      "MoBERT-max(F/N)": 0.7697611922095704,
      "MotionCritic": -7.893771648406982,
      "VeMo (human-opt view)": 0.015893526295387,
      "VeMo (max entropy view)": 0.05036572622779519,
      "VeMo (min entropy view)": 0.015893526295387,
      "VeMo (random view)": 0.05036572622779519,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person vaults over an obstacle."
  },
  "004311": {
    "text": "a person stumbles around like they are drunk.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2817383149060813,
      "Minus Multimodal Distance": -6.8950676918029785,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.3434915542602539,
      "MoBERT-F": 0.7199804240098491,
      "MoBERT-N": 0.6072281871259746,
      "MoBERT-min(F/N)": 0.6072281871259746,
      "MoBERT-max(F/N)": 0.7199804240098491,
      "MotionCritic": -14.051435470581055,
      "VeMo (human-opt view)": 0.6934306569343066,
      "VeMo (max entropy view)": 0.6934306569343066,
      "VeMo (min entropy view)": 0.8352668213457076,
      "VeMo (random view)": 0.8352668213457076,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stumbles around as if they are drunk."
  },
  "004320": {
    "text": "a man walks forward slowly, then turns around.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8374182395053752,
      "Minus Multimodal Distance": -2.9723892211914062,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.330926145077683e-05,
      "MoBERT-F": 0.33162601211779513,
      "MoBERT-N": 0.5081988553634438,
      "MoBERT-min(F/N)": 0.33162601211779513,
      "MoBERT-max(F/N)": 0.5081988553634438,
      "MotionCritic": -5.679950714111328,
      "VeMo (human-opt view)": 7.170166596878222e-06,
      "VeMo (max entropy view)": 0.013215859030837005,
      "VeMo (min entropy view)": 7.170166596878222e-06,
      "VeMo (random view)": 0.013215859030837005,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man walks forward slowly, then turns around."
  },
  "004331": {
    "text": "someone dusts a picture hanging on the wall with a cloth in their right hand, steadies the picture with their left hand, then finishes dusting it, and finally dusts all the way around the sides of the frame.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6842896779744488,
      "Minus Multimodal Distance": -11.671677589416504,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5669853130239062e-05,
      "MoBERT-F": 0.3652642692493685,
      "MoBERT-N": 0.5421145421052307,
      "MoBERT-min(F/N)": 0.3652642692493685,
      "MoBERT-max(F/N)": 0.5421145421052307,
      "MotionCritic": -1.3790684938430786,
      "VeMo (human-opt view)": 2.1559217248588403e-05,
      "VeMo (max entropy view)": 2.1559217248588403e-05,
      "VeMo (min entropy view)": 2.022635778297828e-05,
      "VeMo (random view)": 2.1559217248588403e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone dusts a picture hanging on the wall with a cloth in their right hand, steadies the picture with their left hand, finishes dusting it, and finally dusts all around the sides of the frame."
  },
  "004336": {
    "text": "a person throws things to the right; first something underhand, then something overhand, and then something very far.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8733892738366094,
      "Minus Multimodal Distance": -8.728947639465332,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.030086036771535873,
      "MoBERT-F": 0.6087652625259055,
      "MoBERT-N": 0.46239041940331904,
      "MoBERT-min(F/N)": 0.46239041940331904,
      "MoBERT-max(F/N)": 0.6087652625259055,
      "MotionCritic": -13.149076461791992,
      "VeMo (human-opt view)": 0.2937181663837012,
      "VeMo (max entropy view)": 0.42168674698795183,
      "VeMo (min entropy view)": 0.2937181663837012,
      "VeMo (random view)": 0.42168674698795183,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person throws things to the right: first something underhand, then something overhand, and then something very far."
  },
  "004344": {
    "text": "a person walks straifht forward with head bent forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9272818781555591,
      "Minus Multimodal Distance": -9.331377029418945,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4230772396549582e-05,
      "MoBERT-F": 0.3701878632849956,
      "MoBERT-N": 0.4547629656033544,
      "MoBERT-min(F/N)": 0.3701878632849956,
      "MoBERT-max(F/N)": 0.4547629656033544,
      "MotionCritic": -8.594461441040039,
      "VeMo (human-opt view)": 2.9302089512265962e-05,
      "VeMo (max entropy view)": 0.0018047726209309061,
      "VeMo (min entropy view)": 2.9302089512265962e-05,
      "VeMo (random view)": 0.0018047726209309061,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks straight forward with head bent forward."
  },
  "004346": {
    "text": "a person is stationary moving their arms in symmetrical motion as if they are swimming.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7442711688434351,
      "Minus Multimodal Distance": -5.40872049331665,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.7424073272850364e-05,
      "MoBERT-F": 0.42998563850131166,
      "MoBERT-N": 0.4562303761394861,
      "MoBERT-min(F/N)": 0.42998563850131166,
      "MoBERT-max(F/N)": 0.4562303761394861,
      "MotionCritic": -3.3934009075164795,
      "VeMo (human-opt view)": 0.8736196319018404,
      "VeMo (max entropy view)": 0.8736196319018404,
      "VeMo (min entropy view)": 0.9152129817444219,
      "VeMo (random view)": 0.8736196319018404,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is stationary, moving their arms in a symmetrical motion as if they are swimming."
  },
  "004364": {
    "text": "the person is wiggling his whole body.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8363072940189978,
      "Minus Multimodal Distance": -12.656049728393555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00011884054401889443,
      "MoBERT-F": 0.49262172734724285,
      "MoBERT-N": 0.5480694899292474,
      "MoBERT-min(F/N)": 0.49262172734724285,
      "MoBERT-max(F/N)": 0.5480694899292474,
      "MotionCritic": -8.403303146362305,
      "VeMo (human-opt view)": 0.7547169811320755,
      "VeMo (max entropy view)": 0.7547169811320755,
      "VeMo (min entropy view)": 0.8741463414634146,
      "VeMo (random view)": 0.8741463414634146,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is wiggling his whole body."
  },
  "004366": {
    "text": "the person was laying down on their back.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.1833448355825444,
      "Minus Multimodal Distance": -4.346892833709717,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 9.876093827188015e-05,
      "MoBERT-F": 0.4924119926563592,
      "MoBERT-N": 0.5360684190790503,
      "MoBERT-min(F/N)": 0.4924119926563592,
      "MoBERT-max(F/N)": 0.5360684190790503,
      "MotionCritic": -0.5370969772338867,
      "VeMo (human-opt view)": 0.017978921264724116,
      "VeMo (max entropy view)": 0.21178637200736647,
      "VeMo (min entropy view)": 0.017978921264724116,
      "VeMo (random view)": 0.21178637200736647,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person was lying down on their back."
  },
  "004394": {
    "text": "a person doing a meditation.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8934343412914303,
      "Minus Multimodal Distance": -13.504860877990723,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4553943148930557e-05,
      "MoBERT-F": 0.4714263361283355,
      "MoBERT-N": 0.5955735239998246,
      "MoBERT-min(F/N)": 0.4714263361283355,
      "MoBERT-max(F/N)": 0.5955735239998246,
      "MotionCritic": -6.537637233734131,
      "VeMo (human-opt view)": 0.00014852133045701926,
      "VeMo (max entropy view)": 0.0005865102639296188,
      "VeMo (min entropy view)": 0.00014852133045701926,
      "VeMo (random view)": 0.0005865102639296188,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is doing meditation."
  },
  "004398": {
    "text": "a man takes sideways steps to his right, then immediately takes sideways steps to his left back towards his original position.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9450005840025568,
      "Minus Multimodal Distance": -8.950249671936035,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.820644269580953e-05,
      "MoBERT-F": 0.4371635336855607,
      "MoBERT-N": 0.6418708931997764,
      "MoBERT-min(F/N)": 0.4371635336855607,
      "MoBERT-max(F/N)": 0.6418708931997764,
      "MotionCritic": -6.597362041473389,
      "VeMo (human-opt view)": 0.9687795648060549,
      "VeMo (max entropy view)": 0.9097651421508035,
      "VeMo (min entropy view)": 0.9687795648060549,
      "VeMo (random view)": 0.9687795648060549,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man takes sideways steps to his right, then immediately takes sideways steps to his left, back towards his original position."
  },
  "004424": {
    "text": "someone steps back with their right foot and then sits down while placing his hands on his knees with elbows out.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8490131764531359,
      "Minus Multimodal Distance": -8.4713773727417,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4951947125373408e-05,
      "MoBERT-F": 0.3978203415898228,
      "MoBERT-N": 0.4635174102283065,
      "MoBERT-min(F/N)": 0.3978203415898228,
      "MoBERT-max(F/N)": 0.4635174102283065,
      "MotionCritic": -14.098531723022461,
      "VeMo (human-opt view)": 0.8990825688073395,
      "VeMo (max entropy view)": 0.7058823529411765,
      "VeMo (min entropy view)": 0.8990825688073395,
      "VeMo (random view)": 0.7058823529411765,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone steps back with their right foot, then sits down while placing their hands on their knees, with elbows out."
  },
  "004438": {
    "text": "a person walks forward with their left leg limping.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8689543608808757,
      "Minus Multimodal Distance": -12.588052749633789,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.059190478175878525,
      "MoBERT-F": 0.8073844223907025,
      "MoBERT-N": 0.7080344359054004,
      "MoBERT-min(F/N)": 0.7080344359054004,
      "MoBERT-max(F/N)": 0.8073844223907025,
      "MotionCritic": -5.322739601135254,
      "VeMo (human-opt view)": 0.8809411764705882,
      "VeMo (max entropy view)": 0.7426470588235294,
      "VeMo (min entropy view)": 0.8809411764705882,
      "VeMo (random view)": 0.7426470588235294,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, limping with their left leg."
  },
  "004439": {
    "text": "a person walks from the left hand side to the right hand side in a waving path and then stops while facing towards the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8341675028784479,
      "Minus Multimodal Distance": -8.99470043182373,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.385485979379155e-05,
      "MoBERT-F": 0.46853186240243616,
      "MoBERT-N": 0.5601783008057957,
      "MoBERT-min(F/N)": 0.46853186240243616,
      "MoBERT-max(F/N)": 0.5601783008057957,
      "MotionCritic": -8.624624252319336,
      "VeMo (human-opt view)": 0.7544783983140148,
      "VeMo (max entropy view)": 0.7544783983140148,
      "VeMo (min entropy view)": 0.7770582793709528,
      "VeMo (random view)": 0.7770582793709528,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks from the left - hand side to the right - hand side in a wavy path and then stops while facing right."
  },
  "004444": {
    "text": "the person picks something up, tilts it then puts it back down",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.49718068143781874,
      "Minus Multimodal Distance": -9.042518615722656,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3333635908784345e-05,
      "MoBERT-F": 0.4449642048636218,
      "MoBERT-N": 0.5211816890653371,
      "MoBERT-min(F/N)": 0.4449642048636218,
      "MoBERT-max(F/N)": 0.5211816890653371,
      "MotionCritic": -2.77190899848938,
      "VeMo (human-opt view)": 0.08052121617106582,
      "VeMo (max entropy view)": 0.08052121617106582,
      "VeMo (min entropy view)": 0.022971794126199478,
      "VeMo (random view)": 0.022971794126199478,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person picks something up, tilts it, then puts it back down."
  },
  "004458": {
    "text": "person moves forward and extends the right leg and kicks something to the front",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2566316744007624,
      "Minus Multimodal Distance": -9.571741104125977,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3346481611952186e-05,
      "MoBERT-F": 0.4108952238081488,
      "MoBERT-N": 0.5633288047255959,
      "MoBERT-min(F/N)": 0.4108952238081488,
      "MoBERT-max(F/N)": 0.5633288047255959,
      "MotionCritic": -2.586367130279541,
      "VeMo (human-opt view)": 0.8871595330739299,
      "VeMo (max entropy view)": 0.8077858880778589,
      "VeMo (min entropy view)": 0.8871595330739299,
      "VeMo (random view)": 0.8077858880778589,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person moves forward, extends the right leg, and kicks something forward."
  },
  "004472": {
    "text": "a man dodges something to the left, then the right and then the left again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.010902322854779,
      "Minus Multimodal Distance": -3.610997200012207,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.005039414390921593,
      "MoBERT-F": 0.6410467023373077,
      "MoBERT-N": 0.7102695029416257,
      "MoBERT-min(F/N)": 0.6410467023373077,
      "MoBERT-max(F/N)": 0.7102695029416257,
      "MotionCritic": -9.14474868774414,
      "VeMo (human-opt view)": 0.8355739400206825,
      "VeMo (max entropy view)": 0.8355739400206825,
      "VeMo (min entropy view)": 0.8806431663574521,
      "VeMo (random view)": 0.8806431663574521,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man dodges something to the left, then to the right, and then to the left again."
  },
  "004473": {
    "text": "a person walks forward, spins on their foot, and walks back",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3943270538285628,
      "Minus Multimodal Distance": -4.70902681350708,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0007437616586685181,
      "MoBERT-F": 0.5434665181232288,
      "MoBERT-N": 0.6825610286990782,
      "MoBERT-min(F/N)": 0.5434665181232288,
      "MoBERT-max(F/N)": 0.6825610286990782,
      "MotionCritic": -14.923591613769531,
      "VeMo (human-opt view)": 0.6220095693779905,
      "VeMo (max entropy view)": 0.578544061302682,
      "VeMo (min entropy view)": 0.6220095693779905,
      "VeMo (random view)": 0.578544061302682,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, spins on their foot, and walks back."
  },
  "004495": {
    "text": "a person turns to his left and looks around.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9721543735505662,
      "Minus Multimodal Distance": -7.617582321166992,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.433705569477752e-05,
      "MoBERT-F": 0.4577111625077977,
      "MoBERT-N": 0.5735033612890097,
      "MoBERT-min(F/N)": 0.4577111625077977,
      "MoBERT-max(F/N)": 0.5735033612890097,
      "MotionCritic": -3.1431288719177246,
      "VeMo (human-opt view)": 0.8872638634978671,
      "VeMo (max entropy view)": 0.8267782426778243,
      "VeMo (min entropy view)": 0.8872638634978671,
      "VeMo (random view)": 0.8872638634978671,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person turns to his left and looks around."
  },
  "004499": {
    "text": "a man is doing jumping jacks.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8837371527414332,
      "Minus Multimodal Distance": -9.641637802124023,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.011402283795177937,
      "MoBERT-F": 0.6145731812699984,
      "MoBERT-N": 0.5881266786877126,
      "MoBERT-min(F/N)": 0.5881266786877126,
      "MoBERT-max(F/N)": 0.6145731812699984,
      "MotionCritic": -7.036024570465088,
      "VeMo (human-opt view)": 0.9193934557063048,
      "VeMo (max entropy view)": 0.8869752421959096,
      "VeMo (min entropy view)": 0.9193934557063048,
      "VeMo (random view)": 0.9193934557063048,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is doing jumping jacks."
  },
  "004517": {
    "text": "a person standing points at something by lifting their right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8091110628018959,
      "Minus Multimodal Distance": -3.495086431503296,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2840584279038012e-05,
      "MoBERT-F": 0.32880531011253483,
      "MoBERT-N": 0.41370067839408725,
      "MoBERT-min(F/N)": 0.32880531011253483,
      "MoBERT-max(F/N)": 0.41370067839408725,
      "MotionCritic": -6.014085292816162,
      "VeMo (human-opt view)": 0.8877146631439894,
      "VeMo (max entropy view)": 0.7665647298674821,
      "VeMo (min entropy view)": 0.8877146631439894,
      "VeMo (random view)": 0.8877146631439894,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person standing points at something by lifting their right hand."
  },
  "004521": {
    "text": "a person is making rapid swinging motions with their right leg in the air, while holding onto something with their right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.659743529062911,
      "Minus Multimodal Distance": -7.138694763183594,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 8.910335600376129e-05,
      "MoBERT-F": 0.5827060076600254,
      "MoBERT-N": 0.44788721385260194,
      "MoBERT-min(F/N)": 0.44788721385260194,
      "MoBERT-max(F/N)": 0.5827060076600254,
      "MotionCritic": -14.865808486938477,
      "VeMo (human-opt view)": 0.1928020565552699,
      "VeMo (max entropy view)": 0.2231404958677686,
      "VeMo (min entropy view)": 0.1928020565552699,
      "VeMo (random view)": 0.1928020565552699,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is making rapid swinging motions with their right leg in the air while holding onto something with their right hand."
  },
  "004553": {
    "text": "a person walks forward, picks something up, puts their hand to their mouth and tilts back their head, then puts it down",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.28937190135584,
      "Minus Multimodal Distance": -4.271160125732422,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8176780688227154e-05,
      "MoBERT-F": 0.4103509394414266,
      "MoBERT-N": 0.4421081760278547,
      "MoBERT-min(F/N)": 0.4103509394414266,
      "MoBERT-max(F/N)": 0.4421081760278547,
      "MotionCritic": -9.507253646850586,
      "VeMo (human-opt view)": 0.010353254832259498,
      "VeMo (max entropy view)": 0.7310704960835509,
      "VeMo (min entropy view)": 0.010353254832259498,
      "VeMo (random view)": 0.010353254832259498,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, picks something up, puts their hand to their mouth, tilts their head back, and then puts it down."
  },
  "004556": {
    "text": "someone walks forward and to the right, then stands looking straight ahead.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.906804651003644,
      "Minus Multimodal Distance": -11.089889526367188,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3610456992173567e-05,
      "MoBERT-F": 0.4358663928504971,
      "MoBERT-N": 0.5296416728875208,
      "MoBERT-min(F/N)": 0.4358663928504971,
      "MoBERT-max(F/N)": 0.5296416728875208,
      "MotionCritic": -3.039055824279785,
      "VeMo (human-opt view)": 0.9241379310344827,
      "VeMo (max entropy view)": 0.9241379310344827,
      "VeMo (min entropy view)": 0.9497231450719823,
      "VeMo (random view)": 0.9497231450719823,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone walks forward and to the right, then stands looking straight ahead."
  },
  "004601": {
    "text": "someone is climbing a ladder,  they walk up 3 steps and then back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.104747088839185,
      "Minus Multimodal Distance": -2.7288448810577393,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00013223325368016958,
      "MoBERT-F": 0.6106760525022003,
      "MoBERT-N": 0.5915231696570269,
      "MoBERT-min(F/N)": 0.5915231696570269,
      "MoBERT-max(F/N)": 0.6106760525022003,
      "MotionCritic": -14.537553787231445,
      "VeMo (human-opt view)": 4.120675137634437e-05,
      "VeMo (max entropy view)": 4.120675137634437e-05,
      "VeMo (min entropy view)": 1.1881702765572629e-05,
      "VeMo (random view)": 1.1881702765572629e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone is climbing a ladder. They walk up 3 steps and then walk back down."
  },
  "004602": {
    "text": "person appears to be running in straight line then jumps over something and continues running.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.437868098869605,
      "Minus Multimodal Distance": -8.114094734191895,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.489213850116357e-05,
      "MoBERT-F": 0.42394908211447296,
      "MoBERT-N": 0.5624887948995283,
      "MoBERT-min(F/N)": 0.42394908211447296,
      "MoBERT-max(F/N)": 0.5624887948995283,
      "MotionCritic": -10.647412300109863,
      "VeMo (human-opt view)": 0.8175473579262214,
      "VeMo (max entropy view)": 0.5300353356890459,
      "VeMo (min entropy view)": 0.8175473579262214,
      "VeMo (random view)": 0.5300353356890459,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person appears to be running in a straight line, then jumps over something and continues running."
  },
  "004615": {
    "text": "someone is sliding an object to the left",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3402554215364975,
      "Minus Multimodal Distance": -7.582949161529541,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2766884285374545e-05,
      "MoBERT-F": 0.4807488293508212,
      "MoBERT-N": 0.5334409275922329,
      "MoBERT-min(F/N)": 0.4807488293508212,
      "MoBERT-max(F/N)": 0.5334409275922329,
      "MotionCritic": -6.343027591705322,
      "VeMo (human-opt view)": 0.002812348472603847,
      "VeMo (max entropy view)": 0.002812348472603847,
      "VeMo (min entropy view)": 9.655375949369395e-05,
      "VeMo (random view)": 9.655375949369395e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone is sliding an object to the left."
  },
  "004621": {
    "text": "slowly swinging the arms forward as the body walks slowly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.956667759272418,
      "Minus Multimodal Distance": -5.12434720993042,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.5999961002962664e-05,
      "MoBERT-F": 0.3196783424183497,
      "MoBERT-N": 0.4145446390603555,
      "MoBERT-min(F/N)": 0.3196783424183497,
      "MoBERT-max(F/N)": 0.4145446390603555,
      "MotionCritic": -6.047903060913086,
      "VeMo (human-opt view)": 0.8668866886688669,
      "VeMo (max entropy view)": 0.8668866886688669,
      "VeMo (min entropy view)": 0.8934945308002303,
      "VeMo (random view)": 0.8934945308002303,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person slowly swings the arms forward as the body walks slowly."
  },
  "004668": {
    "text": "person is hunched over creeping diagonally down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0287113308725122,
      "Minus Multimodal Distance": -11.836894989013672,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.26561495661735535,
      "MoBERT-F": 0.7214412857804464,
      "MoBERT-N": 0.6021982958034424,
      "MoBERT-min(F/N)": 0.6021982958034424,
      "MoBERT-max(F/N)": 0.7214412857804464,
      "MotionCritic": -5.384260177612305,
      "VeMo (human-opt view)": 0.9687916777807415,
      "VeMo (max entropy view)": 0.8263888888888888,
      "VeMo (min entropy view)": 0.9687916777807415,
      "VeMo (random view)": 0.8263888888888888,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is hunched over, creeping diagonally down."
  },
  "004679": {
    "text": "a person takes their hands from their side places them above their shoulders and then places them back at their side",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9466544416058035,
      "Minus Multimodal Distance": -2.847898483276367,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.1486142688663676e-05,
      "MoBERT-F": 0.3832288213278986,
      "MoBERT-N": 0.3881882943766691,
      "MoBERT-min(F/N)": 0.3832288213278986,
      "MoBERT-max(F/N)": 0.3881882943766691,
      "MotionCritic": -4.663939476013184,
      "VeMo (human-opt view)": 0.9820627802690582,
      "VeMo (max entropy view)": 0.9820627802690582,
      "VeMo (min entropy view)": 0.9850860420650096,
      "VeMo (random view)": 0.9850860420650096,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person takes their hands from their sides, places them above their shoulders, and then places them back at their sides."
  },
  "004698": {
    "text": "a man walks forwards at medium pace with his arms swawing gently.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.115976444550831,
      "Minus Multimodal Distance": -7.03915548324585,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.18999682366847992,
      "MoBERT-F": 0.6772758947542903,
      "MoBERT-N": 0.7207729132335516,
      "MoBERT-min(F/N)": 0.6772758947542903,
      "MoBERT-max(F/N)": 0.7207729132335516,
      "MotionCritic": -6.710999488830566,
      "VeMo (human-opt view)": 0.8075117370892019,
      "VeMo (max entropy view)": 0.6919431279620853,
      "VeMo (min entropy view)": 0.8075117370892019,
      "VeMo (random view)": 0.8075117370892019,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks forward at a medium pace with his arms swinging gently."
  },
  "004707": {
    "text": "the figure curls its arms outwards from its chest, it lowers its arms in the motion towards its groin and then raises them upwards at head level.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8187204267252869,
      "Minus Multimodal Distance": -7.866647720336914,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.4331896788207814e-05,
      "MoBERT-F": 0.37397451013293903,
      "MoBERT-N": 0.5147025223343311,
      "MoBERT-min(F/N)": 0.37397451013293903,
      "MoBERT-max(F/N)": 0.5147025223343311,
      "MotionCritic": -7.559981822967529,
      "VeMo (human-opt view)": 0.8181818181818182,
      "VeMo (max entropy view)": 0.8181818181818182,
      "VeMo (min entropy view)": 0.8522727272727273,
      "VeMo (random view)": 0.8522727272727273,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person curls their arms outwards from their chest. They lower their arms in a motion towards their groin and then raise them upwards to head level."
  },
  "004708": {
    "text": "the person steps a little wider than shoulder width apart first with their right foot, then with their left before squatting 4 times.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.009635831874898,
      "Minus Multimodal Distance": -9.832185745239258,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.7650232115993276e-05,
      "MoBERT-F": 0.3965012448431805,
      "MoBERT-N": 0.5218153554340309,
      "MoBERT-min(F/N)": 0.3965012448431805,
      "MoBERT-max(F/N)": 0.5218153554340309,
      "MotionCritic": -7.224918365478516,
      "VeMo (human-opt view)": 0.8670849971477467,
      "VeMo (max entropy view)": 0.844,
      "VeMo (min entropy view)": 0.8670849971477467,
      "VeMo (random view)": 0.8670849971477467,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person steps a little wider than shoulder - width apart, first with their right foot, then with their left, before squatting 4 times."
  },
  "004719": {
    "text": "a person runs forward and then to the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.9544941754089216,
      "Minus Multimodal Distance": -11.283429145812988,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.4962926040170714e-05,
      "MoBERT-F": 0.4812414934539815,
      "MoBERT-N": 0.6194531082142081,
      "MoBERT-min(F/N)": 0.4812414934539815,
      "MoBERT-max(F/N)": 0.6194531082142081,
      "MotionCritic": -11.806121826171875,
      "VeMo (human-opt view)": 0.9241379310344827,
      "VeMo (max entropy view)": 0.9049773755656109,
      "VeMo (min entropy view)": 0.9241379310344827,
      "VeMo (random view)": 0.9241379310344827,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person runs forward and then to the right."
  },
  "004724": {
    "text": "a person shifts around in place like a zombie, raising their arms up and down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0652516351657653,
      "Minus Multimodal Distance": -7.934060573577881,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0002035608486039564,
      "MoBERT-F": 0.5402583984292124,
      "MoBERT-N": 0.4881326438492291,
      "MoBERT-min(F/N)": 0.4881326438492291,
      "MoBERT-max(F/N)": 0.5402583984292124,
      "MotionCritic": -6.318368434906006,
      "VeMo (human-opt view)": 0.887055183084064,
      "VeMo (max entropy view)": 0.887055183084064,
      "VeMo (min entropy view)": 0.8934945308002303,
      "VeMo (random view)": 0.8934945308002303,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person shuffles around in place like a zombie, raising their arms up and down."
  },
  "004734": {
    "text": "a person walks forwards casually.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3974119779729877,
      "Minus Multimodal Distance": -11.333667755126953,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.013742450624704361,
      "MoBERT-F": 0.7008617437462896,
      "MoBERT-N": 0.7753310831307015,
      "MoBERT-min(F/N)": 0.7008617437462896,
      "MoBERT-max(F/N)": 0.7753310831307015,
      "MotionCritic": -4.37025785446167,
      "VeMo (human-opt view)": 0.9858971577348665,
      "VeMo (max entropy view)": 0.9554140127388535,
      "VeMo (min entropy view)": 0.9858971577348665,
      "VeMo (random view)": 0.9554140127388535,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward casually."
  },
  "004759": {
    "text": "a person is walking in a stumbling motion and puts up one of his hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5702605168608228,
      "Minus Multimodal Distance": -8.983368873596191,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00033747375709936023,
      "MoBERT-F": 0.46077889192251736,
      "MoBERT-N": 0.48525759299524984,
      "MoBERT-min(F/N)": 0.46077889192251736,
      "MoBERT-max(F/N)": 0.48525759299524984,
      "MotionCritic": -13.09273910522461,
      "VeMo (human-opt view)": 0.03749662800107904,
      "VeMo (max entropy view)": 0.42327150084317033,
      "VeMo (min entropy view)": 0.03749662800107904,
      "VeMo (random view)": 0.03749662800107904,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking in a stumbling motion and puts up one of his hands."
  },
  "004776": {
    "text": "a man moving like a chicken.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9459055373726614,
      "Minus Multimodal Distance": -13.974383354187012,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2291649656835943e-05,
      "MoBERT-F": 0.3994902109970353,
      "MoBERT-N": 0.5208054735586729,
      "MoBERT-min(F/N)": 0.3994902109970353,
      "MoBERT-max(F/N)": 0.5208054735586729,
      "MotionCritic": -5.464378833770752,
      "VeMo (human-opt view)": 0.4518950437317784,
      "VeMo (max entropy view)": 0.4518950437317784,
      "VeMo (min entropy view)": 0.7661016949152543,
      "VeMo (random view)": 0.7661016949152543,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is moving like a chicken."
  },
  "004779": {
    "text": "a person limping with right leg hurt and going around in a circle.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.0285254252539957,
      "Minus Multimodal Distance": -7.343600273132324,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.07039130479097366,
      "MoBERT-F": 0.6262269451981703,
      "MoBERT-N": 0.5809398372362626,
      "MoBERT-min(F/N)": 0.5809398372362626,
      "MoBERT-max(F/N)": 0.6262269451981703,
      "MotionCritic": -15.756431579589844,
      "VeMo (human-opt view)": 0.9149828440716736,
      "VeMo (max entropy view)": 0.9045362220717671,
      "VeMo (min entropy view)": 0.9149828440716736,
      "VeMo (random view)": 0.9045362220717671,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is limping with their right leg hurt and walking around in a circle."
  },
  "004807": {
    "text": "a person on a swivel chair moved from front to left typing and talking on the phone motions.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.513460137363872,
      "Minus Multimodal Distance": -9.715764999389648,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2988788259681314e-05,
      "MoBERT-F": 0.41965248551454426,
      "MoBERT-N": 0.3944999790462496,
      "MoBERT-min(F/N)": 0.3944999790462496,
      "MoBERT-max(F/N)": 0.41965248551454426,
      "MotionCritic": -7.557065963745117,
      "VeMo (human-opt view)": 4.015309572337408e-05,
      "VeMo (max entropy view)": 4.015309572337408e-05,
      "VeMo (min entropy view)": 2.3519363725405016e-05,
      "VeMo (random view)": 4.015309572337408e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person on a swivel chair moved from the front to the left, typing and talking on the phone simultaneously."
  },
  "004808": {
    "text": "a person walks, speeds up, and jumps.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.588392557376978,
      "Minus Multimodal Distance": -6.7232666015625,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.012822686694562435,
      "MoBERT-F": 0.6166712089154127,
      "MoBERT-N": 0.642754995202569,
      "MoBERT-min(F/N)": 0.6166712089154127,
      "MoBERT-max(F/N)": 0.642754995202569,
      "MotionCritic": -10.03769302368164,
      "VeMo (human-opt view)": 0.7665056360708534,
      "VeMo (max entropy view)": 0.6234817813765182,
      "VeMo (min entropy view)": 0.7665056360708534,
      "VeMo (random view)": 0.7665056360708534,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks, speeds up, and jumps."
  },
  "004811": {
    "text": "person walks forward then turns to their right and carry on walking then stop",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4060201487164659,
      "Minus Multimodal Distance": -11.492966651916504,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2642987460130826e-05,
      "MoBERT-F": 0.4180612988578465,
      "MoBERT-N": 0.6365070997189355,
      "MoBERT-min(F/N)": 0.4180612988578465,
      "MoBERT-max(F/N)": 0.6365070997189355,
      "MotionCritic": -5.413445949554443,
      "VeMo (human-opt view)": 0.8669527896995708,
      "VeMo (max entropy view)": 0.843846949327818,
      "VeMo (min entropy view)": 0.8669527896995708,
      "VeMo (random view)": 0.8669527896995708,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, then turns to their right and carries on walking, then stops."
  },
  "004817": {
    "text": "stick man walking straightforward in wobbling position maybe taking a dui test.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4719580195447532,
      "Minus Multimodal Distance": -7.983205318450928,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0013002007035538554,
      "MoBERT-F": 0.6492912366410902,
      "MoBERT-N": 0.6700684228984364,
      "MoBERT-min(F/N)": 0.6492912366410902,
      "MoBERT-max(F/N)": 0.6700684228984364,
      "MotionCritic": -11.317390441894531,
      "VeMo (human-opt view)": 0.8736532810969637,
      "VeMo (max entropy view)": 0.8736532810969637,
      "VeMo (min entropy view)": 0.9323692045937899,
      "VeMo (random view)": 0.8736532810969637,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking straightforward in a wobbling position, perhaps taking a DUI test."
  },
  "004818": {
    "text": "the man walk forward and move the right hand to the right side and went back.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.432160061768554,
      "Minus Multimodal Distance": -8.612695693969727,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.24738594889640808,
      "MoBERT-F": 0.5968797366123615,
      "MoBERT-N": 0.554456516929101,
      "MoBERT-min(F/N)": 0.554456516929101,
      "MoBERT-max(F/N)": 0.5968797366123615,
      "MotionCritic": -2.122791290283203,
      "VeMo (human-opt view)": 0.777120315581854,
      "VeMo (max entropy view)": 0.777120315581854,
      "VeMo (min entropy view)": 0.7977736549165121,
      "VeMo (random view)": 0.7977736549165121,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man walked forward, moved his right hand to the right side, and then went back."
  },
  "004819": {
    "text": "a person is making a high kick with his left leg.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0607362401155047,
      "Minus Multimodal Distance": -12.5350341796875,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0020263311453163624,
      "MoBERT-F": 0.6766550068885304,
      "MoBERT-N": 0.6484870159686664,
      "MoBERT-min(F/N)": 0.6484870159686664,
      "MoBERT-max(F/N)": 0.6766550068885304,
      "MotionCritic": -5.663817882537842,
      "VeMo (human-opt view)": 0.9324441867207887,
      "VeMo (max entropy view)": 0.9324441867207887,
      "VeMo (min entropy view)": 0.946712802768166,
      "VeMo (random view)": 0.9324441867207887,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is making a high kick with their left leg."
  },
  "004841": {
    "text": "a person struggling to push arms forward and upwards, arms shaking.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.4898114533775795,
      "Minus Multimodal Distance": -5.455386638641357,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7437079552328214e-05,
      "MoBERT-F": 0.4161941283294165,
      "MoBERT-N": 0.4411079962049635,
      "MoBERT-min(F/N)": 0.4161941283294165,
      "MoBERT-max(F/N)": 0.4411079962049635,
      "MotionCritic": -9.415566444396973,
      "VeMo (human-opt view)": 0.8442437923250564,
      "VeMo (max entropy view)": 0.4697986577181208,
      "VeMo (min entropy view)": 0.8442437923250564,
      "VeMo (random view)": 0.8442437923250564,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is struggling to push their arms forward and upwards, with their arms shaking."
  },
  "004854": {
    "text": "the person is walking around.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5255682405821602,
      "Minus Multimodal Distance": -4.092678070068359,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9805895090103149,
      "MoBERT-F": 0.7097216233335819,
      "MoBERT-N": 0.5180636328423687,
      "MoBERT-min(F/N)": 0.5180636328423687,
      "MoBERT-max(F/N)": 0.7097216233335819,
      "MotionCritic": -1.9673761129379272,
      "VeMo (human-opt view)": 0.9754508435136707,
      "VeMo (max entropy view)": 0.9754508435136707,
      "VeMo (min entropy view)": 0.9797054389423634,
      "VeMo (random view)": 0.9797054389423634,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking around."
  },
  "004863": {
    "text": "a person walks and then puts both arms out while bowing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5238966536328864,
      "Minus Multimodal Distance": -11.579060554504395,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.16968181729316711,
      "MoBERT-F": 0.5975504153931113,
      "MoBERT-N": 0.5484808623463897,
      "MoBERT-min(F/N)": 0.5484808623463897,
      "MoBERT-max(F/N)": 0.5975504153931113,
      "MotionCritic": -13.036855697631836,
      "VeMo (human-opt view)": 0.01694915254237288,
      "VeMo (max entropy view)": 0.04212020823473734,
      "VeMo (min entropy view)": 0.01694915254237288,
      "VeMo (random view)": 0.01694915254237288,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks and then puts both arms out while bowing."
  },
  "004864": {
    "text": "a person is in a fighting stance with their legs spread and fists raised. they hop forward and kick out with their left foot before returning to their original position.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0194955876541256,
      "Minus Multimodal Distance": -8.698877334594727,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.7201154828071594,
      "MoBERT-F": 0.6464661049005032,
      "MoBERT-N": 0.6080238557640891,
      "MoBERT-min(F/N)": 0.6080238557640891,
      "MoBERT-max(F/N)": 0.6464661049005032,
      "MotionCritic": -9.387928009033203,
      "VeMo (human-opt view)": 0.928416485900217,
      "VeMo (max entropy view)": 0.5480769230769231,
      "VeMo (min entropy view)": 0.928416485900217,
      "VeMo (random view)": 0.5480769230769231,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is in a fighting stance, with their legs spread and fists raised. They hop forward, kick out with their left foot, and then return to their original position."
  },
  "004881": {
    "text": "character lowers left hand then waves right hand in the air.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9457469827133838,
      "Minus Multimodal Distance": -11.39924430847168,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.920729275501799e-05,
      "MoBERT-F": 0.3985559571818875,
      "MoBERT-N": 0.467420693987442,
      "MoBERT-min(F/N)": 0.3985559571818875,
      "MoBERT-max(F/N)": 0.467420693987442,
      "MotionCritic": -3.942561388015747,
      "VeMo (human-opt view)": 0.7316017316017316,
      "VeMo (max entropy view)": 0.7184466019417476,
      "VeMo (min entropy view)": 0.7316017316017316,
      "VeMo (random view)": 0.7316017316017316,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The character lowers their left hand and then waves their right hand in the air."
  },
  "004947": {
    "text": "a person sits down then uses left arm to push up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3339869882254,
      "Minus Multimodal Distance": -10.402066230773926,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.1950439733918756e-05,
      "MoBERT-F": 0.4504311531496862,
      "MoBERT-N": 0.40639129559447174,
      "MoBERT-min(F/N)": 0.40639129559447174,
      "MoBERT-max(F/N)": 0.4504311531496862,
      "MotionCritic": -17.505634307861328,
      "VeMo (human-opt view)": 0.8074245939675174,
      "VeMo (max entropy view)": 0.743142144638404,
      "VeMo (min entropy view)": 0.8074245939675174,
      "VeMo (random view)": 0.743142144638404,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits down and then uses the left arm to push up."
  },
  "004952": {
    "text": "a person does a jumping jack in place.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4611140030120455,
      "Minus Multimodal Distance": -8.327152252197266,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00041299453005194664,
      "MoBERT-F": 0.54556144638886,
      "MoBERT-N": 0.6625166006231721,
      "MoBERT-min(F/N)": 0.54556144638886,
      "MoBERT-max(F/N)": 0.6625166006231721,
      "MotionCritic": -8.639348030090332,
      "VeMo (human-opt view)": 0.964964964964965,
      "VeMo (max entropy view)": 0.96040016673614,
      "VeMo (min entropy view)": 0.964964964964965,
      "VeMo (random view)": 0.964964964964965,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person does a jumping jack in place."
  },
  "004960": {
    "text": "person is adjusting something on their head",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7811109532849918,
      "Minus Multimodal Distance": -9.016196250915527,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.05723172798752785,
      "MoBERT-F": 0.7213963408024543,
      "MoBERT-N": 0.677406512202324,
      "MoBERT-min(F/N)": 0.677406512202324,
      "MoBERT-max(F/N)": 0.7213963408024543,
      "MotionCritic": -5.715061664581299,
      "VeMo (human-opt view)": 0.9859269126078647,
      "VeMo (max entropy view)": 0.9859269126078647,
      "VeMo (min entropy view)": 0.9875715682167295,
      "VeMo (random view)": 0.9859269126078647,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is adjusting something on their head."
  },
  "004968": {
    "text": "a person throws and object and then catches another object",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8584231696864225,
      "Minus Multimodal Distance": -7.1285905838012695,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8415500521659851,
      "MoBERT-F": 0.669393292890919,
      "MoBERT-N": 0.5687090600763488,
      "MoBERT-min(F/N)": 0.5687090600763488,
      "MoBERT-max(F/N)": 0.669393292890919,
      "MotionCritic": -6.408373832702637,
      "VeMo (human-opt view)": 0.0002029190940985834,
      "VeMo (max entropy view)": 0.00090915138479278,
      "VeMo (min entropy view)": 0.0002029190940985834,
      "VeMo (random view)": 0.0002029190940985834,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person throws an object and then catches another object."
  },
  "004973": {
    "text": "a person does one squat with arms straight out in front.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5534398957560981,
      "Minus Multimodal Distance": -7.570670127868652,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.4540018532425165e-05,
      "MoBERT-F": 0.3077961555051405,
      "MoBERT-N": 0.47391431148261215,
      "MoBERT-min(F/N)": 0.3077961555051405,
      "MoBERT-max(F/N)": 0.47391431148261215,
      "MotionCritic": -4.620439529418945,
      "VeMo (human-opt view)": 0.8356435643564356,
      "VeMo (max entropy view)": 0.5941043083900227,
      "VeMo (min entropy view)": 0.8356435643564356,
      "VeMo (random view)": 0.5941043083900227,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person does one squat with their arms straight out in front."
  },
  "004977": {
    "text": "a person is walking like a mummy.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5258726113678358,
      "Minus Multimodal Distance": -6.146442413330078,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.37588241696357727,
      "MoBERT-F": 0.7889278180122452,
      "MoBERT-N": 0.7005147830198868,
      "MoBERT-min(F/N)": 0.7005147830198868,
      "MoBERT-max(F/N)": 0.7889278180122452,
      "MotionCritic": -3.8624773025512695,
      "VeMo (human-opt view)": 0.7658862876254181,
      "VeMo (max entropy view)": 0.7658862876254181,
      "VeMo (min entropy view)": 0.7878787878787878,
      "VeMo (random view)": 0.7878787878787878,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking like a mummy."
  },
  "004991": {
    "text": "a person flaps their arms like a chicken.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6819825976833374,
      "Minus Multimodal Distance": -4.628737449645996,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2651647668681107e-05,
      "MoBERT-F": 0.3595597404809891,
      "MoBERT-N": 0.5016698755705316,
      "MoBERT-min(F/N)": 0.3595597404809891,
      "MoBERT-max(F/N)": 0.5016698755705316,
      "MotionCritic": -3.6751110553741455,
      "VeMo (human-opt view)": 0.777947932618683,
      "VeMo (max entropy view)": 0.777947932618683,
      "VeMo (min entropy view)": 0.9285389167045972,
      "VeMo (random view)": 0.777947932618683,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person flaps their arms like a chicken."
  },
  "004996": {
    "text": "the person is sat down and their arms are shaking",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1849696984806377,
      "Minus Multimodal Distance": -9.479848861694336,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3888947907835245e-05,
      "MoBERT-F": 0.3442041958286187,
      "MoBERT-N": 0.47837350407346696,
      "MoBERT-min(F/N)": 0.3442041958286187,
      "MoBERT-max(F/N)": 0.47837350407346696,
      "MotionCritic": -10.628973007202148,
      "VeMo (human-opt view)": 3.3853259397594626e-06,
      "VeMo (max entropy view)": 5.251373853528483e-06,
      "VeMo (min entropy view)": 3.3853259397594626e-06,
      "VeMo (random view)": 3.3853259397594626e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is sitting down and their arms are shaking."
  },
  "004997": {
    "text": "a person holds their head with both hands then bats away something with their right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6442907918060871,
      "Minus Multimodal Distance": -11.341856956481934,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5332044970127754e-05,
      "MoBERT-F": 0.34401453552418115,
      "MoBERT-N": 0.3941995258183728,
      "MoBERT-min(F/N)": 0.34401453552418115,
      "MoBERT-max(F/N)": 0.3941995258183728,
      "MotionCritic": -8.270830154418945,
      "VeMo (human-opt view)": 0.994797869006581,
      "VeMo (max entropy view)": 0.9932885906040269,
      "VeMo (min entropy view)": 0.994797869006581,
      "VeMo (random view)": 0.9932885906040269,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person holds their head with both hands, then bats away something with their right hand."
  },
  "005031": {
    "text": "a person does a drinking motion with his right arm",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7103042173935441,
      "Minus Multimodal Distance": -11.229196548461914,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9267017453094013e-05,
      "MoBERT-F": 0.38754149616443356,
      "MoBERT-N": 0.4600818075493626,
      "MoBERT-min(F/N)": 0.38754149616443356,
      "MoBERT-max(F/N)": 0.4600818075493626,
      "MotionCritic": -5.298515319824219,
      "VeMo (human-opt view)": 0.026072150686389274,
      "VeMo (max entropy view)": 0.026072150686389274,
      "VeMo (min entropy view)": 0.007582235904452426,
      "VeMo (random view)": 0.026072150686389274,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person makes a drinking motion with his right arm."
  },
  "005037": {
    "text": "the drunk guy struggles to walk down the street",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.9835099624059285,
      "Minus Multimodal Distance": -4.273891448974609,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.7683250308036804,
      "MoBERT-F": 0.6588716835306792,
      "MoBERT-N": 0.7147029633865886,
      "MoBERT-min(F/N)": 0.6588716835306792,
      "MoBERT-max(F/N)": 0.7147029633865886,
      "MotionCritic": -2.019216299057007,
      "VeMo (human-opt view)": 0.8176100628930818,
      "VeMo (max entropy view)": 0.7988165680473372,
      "VeMo (min entropy view)": 0.8176100628930818,
      "VeMo (random view)": 0.7988165680473372,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The drunk guy struggles to walk down the street."
  },
  "005039": {
    "text": "a person walks forward but slowly curves towards the left as they are walking and then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8309389659780411,
      "Minus Multimodal Distance": -10.832612037658691,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4677185137989e-05,
      "MoBERT-F": 0.3878505365344484,
      "MoBERT-N": 0.5413493936723505,
      "MoBERT-min(F/N)": 0.3878505365344484,
      "MoBERT-max(F/N)": 0.5413493936723505,
      "MotionCritic": -10.15060806274414,
      "VeMo (human-opt view)": 0.9867535287730728,
      "VeMo (max entropy view)": 0.9722849517552864,
      "VeMo (min entropy view)": 0.9867535287730728,
      "VeMo (random view)": 0.9722849517552864,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward but slowly curves to the left as they walk, and then stops."
  },
  "005048": {
    "text": "person walking very slowly towards something",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.78842785137406,
      "Minus Multimodal Distance": -11.220845222473145,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0002898213278967887,
      "MoBERT-F": 0.7093744376924709,
      "MoBERT-N": 0.6239476952473682,
      "MoBERT-min(F/N)": 0.6239476952473682,
      "MoBERT-max(F/N)": 0.7093744376924709,
      "MotionCritic": -4.55307674407959,
      "VeMo (human-opt view)": 0.9706653351641493,
      "VeMo (max entropy view)": 0.9706653351641493,
      "VeMo (min entropy view)": 0.9723455104131102,
      "VeMo (random view)": 0.9706653351641493,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking very slowly towards something."
  },
  "005050": {
    "text": "a person climbs up something, turns around and climbs back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4221213054473212,
      "Minus Multimodal Distance": -10.599050521850586,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.48130810260772705,
      "MoBERT-F": 0.7398060650319198,
      "MoBERT-N": 0.7246503408347005,
      "MoBERT-min(F/N)": 0.7246503408347005,
      "MoBERT-max(F/N)": 0.7398060650319198,
      "MotionCritic": -16.34882354736328,
      "VeMo (human-opt view)": 0.002047072881671352,
      "VeMo (max entropy view)": 0.002047072881671352,
      "VeMo (min entropy view)": 0.0001684630326783903,
      "VeMo (random view)": 0.002047072881671352,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person climbs up something, turns around, and climbs back down."
  },
  "005077": {
    "text": "a person sidesteps to their left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.452953764345863,
      "Minus Multimodal Distance": -5.263476848602295,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3424179744324647e-05,
      "MoBERT-F": 0.352510106381398,
      "MoBERT-N": 0.5503248119037407,
      "MoBERT-min(F/N)": 0.352510106381398,
      "MoBERT-max(F/N)": 0.5503248119037407,
      "MotionCritic": -1.0433380603790283,
      "VeMo (human-opt view)": 0.9648641913780215,
      "VeMo (max entropy view)": 0.9283094364788262,
      "VeMo (min entropy view)": 0.9648641913780215,
      "VeMo (random view)": 0.9283094364788262,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sidesteps to their left."
  },
  "005119": {
    "text": "person appears to be holding some thing with both hands and then throws it forward with their right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7565933917285115,
      "Minus Multimodal Distance": -10.008306503295898,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.025631897151470184,
      "MoBERT-F": 0.5285050740313111,
      "MoBERT-N": 0.501523293398967,
      "MoBERT-min(F/N)": 0.501523293398967,
      "MoBERT-max(F/N)": 0.5285050740313111,
      "MotionCritic": -0.33893170952796936,
      "VeMo (human-opt view)": 0.6238095238095238,
      "VeMo (max entropy view)": 0.6238095238095238,
      "VeMo (min entropy view)": 0.13305237808549067,
      "VeMo (random view)": 0.6238095238095238,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person appears to be holding something with both hands and then throws it forward with their right hand."
  },
  "005139": {
    "text": "a man squats and walks forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3563671357189704,
      "Minus Multimodal Distance": -4.925078392028809,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0001257713884115219,
      "MoBERT-F": 0.5502885894465059,
      "MoBERT-N": 0.5552952993272202,
      "MoBERT-min(F/N)": 0.5502885894465059,
      "MoBERT-max(F/N)": 0.5552952993272202,
      "MotionCritic": -6.2434611320495605,
      "VeMo (human-opt view)": 0.9740467798782442,
      "VeMo (max entropy view)": 0.8667992047713717,
      "VeMo (min entropy view)": 0.9740467798782442,
      "VeMo (random view)": 0.9740467798782442,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man squats and walks forward."
  },
  "005141": {
    "text": "jumping up in place.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2885373175445753,
      "Minus Multimodal Distance": -14.97486686706543,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.03650279343128204,
      "MoBERT-F": 0.7334640274672597,
      "MoBERT-N": 0.7442258441675302,
      "MoBERT-min(F/N)": 0.7334640274672597,
      "MoBERT-max(F/N)": 0.7442258441675302,
      "MotionCritic": -21.306774139404297,
      "VeMo (human-opt view)": 0.09047237790232186,
      "VeMo (max entropy view)": 0.24528301886792453,
      "VeMo (min entropy view)": 0.09047237790232186,
      "VeMo (random view)": 0.24528301886792453,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is jumping up in place."
  },
  "005156": {
    "text": "a person walks forward and kicks their leg out while turning around.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.678591971493914,
      "Minus Multimodal Distance": -6.699631690979004,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.001814796356484294,
      "MoBERT-F": 0.5812106714839315,
      "MoBERT-N": 0.6727836774189042,
      "MoBERT-min(F/N)": 0.5812106714839315,
      "MoBERT-max(F/N)": 0.6727836774189042,
      "MotionCritic": -13.98481559753418,
      "VeMo (human-opt view)": 0.8872638634978671,
      "VeMo (max entropy view)": 0.8517745302713987,
      "VeMo (min entropy view)": 0.8872638634978671,
      "VeMo (random view)": 0.8872638634978671,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, kicks their leg out, and turns around."
  },
  "005180": {
    "text": "a man pats himself on the head",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5133166895385118,
      "Minus Multimodal Distance": -2.3339040279388428,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00037617768975906074,
      "MoBERT-F": 0.5129132775389738,
      "MoBERT-N": 0.6781505880373659,
      "MoBERT-min(F/N)": 0.5129132775389738,
      "MoBERT-max(F/N)": 0.6781505880373659,
      "MotionCritic": -10.41891098022461,
      "VeMo (human-opt view)": 0.6511627906976745,
      "VeMo (max entropy view)": 0.6511627906976745,
      "VeMo (min entropy view)": 0.859538784067086,
      "VeMo (random view)": 0.859538784067086,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man pats himself on the head."
  },
  "005197": {
    "text": "a person lowers their arms to their sides, then stretches them out straight in front of them and parallel to the ground before returning them to their sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.41213264876153066,
      "Minus Multimodal Distance": -9.554906845092773,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4349976229132153e-05,
      "MoBERT-F": 0.39326626777180107,
      "MoBERT-N": 0.5014228167208643,
      "MoBERT-min(F/N)": 0.39326626777180107,
      "MoBERT-max(F/N)": 0.5014228167208643,
      "MotionCritic": -9.110719680786133,
      "VeMo (human-opt view)": 0.7985212569316081,
      "VeMo (max entropy view)": 0.7664233576642335,
      "VeMo (min entropy view)": 0.7985212569316081,
      "VeMo (random view)": 0.7664233576642335,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person lowers their arms to their sides, then stretches them out straight in front of themselves and parallel to the ground before returning them to their sides."
  },
  "005229": {
    "text": "a person squats to lift something up then struggles to carry and put it down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9466825355078347,
      "Minus Multimodal Distance": -8.52349853515625,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6878920834860764e-05,
      "MoBERT-F": 0.45000459395367637,
      "MoBERT-N": 0.4705654259965305,
      "MoBERT-min(F/N)": 0.45000459395367637,
      "MoBERT-max(F/N)": 0.4705654259965305,
      "MotionCritic": -12.753423690795898,
      "VeMo (human-opt view)": 0.004067197170645446,
      "VeMo (max entropy view)": 0.033011681056373796,
      "VeMo (min entropy view)": 0.004067197170645446,
      "VeMo (random view)": 0.004067197170645446,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person squats to lift something up, then struggles to carry it and put it down."
  },
  "005266": {
    "text": "a figure walks and spins on their heel.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4162210613649435,
      "Minus Multimodal Distance": -2.9920225143432617,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.924463453586213e-05,
      "MoBERT-F": 0.4629536575396802,
      "MoBERT-N": 0.6457070476234956,
      "MoBERT-min(F/N)": 0.4629536575396802,
      "MoBERT-max(F/N)": 0.6457070476234956,
      "MotionCritic": -5.489254951477051,
      "VeMo (human-opt view)": 0.7058823529411765,
      "VeMo (max entropy view)": 0.7058823529411765,
      "VeMo (min entropy view)": 0.7777777777777778,
      "VeMo (random view)": 0.7058823529411765,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks and spins on their heel."
  },
  "005274": {
    "text": "a person brings their right arm up and bends their elbow so it is across their stomach.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4579378758758826,
      "Minus Multimodal Distance": -8.575862884521484,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2526102839037776e-05,
      "MoBERT-F": 0.33752857485535254,
      "MoBERT-N": 0.4757605834373767,
      "MoBERT-min(F/N)": 0.33752857485535254,
      "MoBERT-max(F/N)": 0.4757605834373767,
      "MotionCritic": -12.499532699584961,
      "VeMo (human-opt view)": 0.9770385999751768,
      "VeMo (max entropy view)": 0.5319148936170213,
      "VeMo (min entropy view)": 0.9770385999751768,
      "VeMo (random view)": 0.9770385999751768,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises their right arm and bends their elbow so that it crosses their stomach."
  },
  "005278": {
    "text": "the man walks while holding onto the rail.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.288761185282239,
      "Minus Multimodal Distance": -9.406145095825195,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.190066490788013e-05,
      "MoBERT-F": 0.3459398002027813,
      "MoBERT-N": 0.5609577580583119,
      "MoBERT-min(F/N)": 0.3459398002027813,
      "MoBERT-max(F/N)": 0.5609577580583119,
      "MotionCritic": -16.23492431640625,
      "VeMo (human-opt view)": 0.0012449149634857618,
      "VeMo (max entropy view)": 0.006295462700198976,
      "VeMo (min entropy view)": 0.0012449149634857618,
      "VeMo (random view)": 0.0012449149634857618,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man walks while holding onto the rail."
  },
  "005296": {
    "text": "person swings arms up and down at the joints while bouncing around, as if dancing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9702053914377052,
      "Minus Multimodal Distance": -3.4862656593322754,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.977854430559091e-05,
      "MoBERT-F": 0.5002632731078961,
      "MoBERT-N": 0.5612024174639632,
      "MoBERT-min(F/N)": 0.5002632731078961,
      "MoBERT-max(F/N)": 0.5612024174639632,
      "MotionCritic": -11.203752517700195,
      "VeMo (human-opt view)": 0.5936599423631124,
      "VeMo (max entropy view)": 0.5936599423631124,
      "VeMo (min entropy view)": 0.7878211227402474,
      "VeMo (random view)": 0.7878211227402474,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person swings their arms up and down at the joints while bouncing around, as if dancing."
  },
  "005300": {
    "text": "a person dribbles a basketball through their legs then runs quickly.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3998190770478216,
      "Minus Multimodal Distance": -11.282124519348145,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7663025321089663e-05,
      "MoBERT-F": 0.3210321584389292,
      "MoBERT-N": 0.4186292205203845,
      "MoBERT-min(F/N)": 0.3210321584389292,
      "MoBERT-max(F/N)": 0.4186292205203845,
      "MotionCritic": -10.262003898620605,
      "VeMo (human-opt view)": 2.3519363725405016e-05,
      "VeMo (max entropy view)": 6.812762575224254e-05,
      "VeMo (min entropy view)": 2.3519363725405016e-05,
      "VeMo (random view)": 2.3519363725405016e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person dribbles a basketball through their legs and then runs quickly."
  },
  "005312": {
    "text": "a person is dancing with expressive arm movement and then jumps forward",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6005273830389366,
      "Minus Multimodal Distance": -4.419153690338135,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 6.541303446283564e-05,
      "MoBERT-F": 0.4836176171627794,
      "MoBERT-N": 0.550875248647662,
      "MoBERT-min(F/N)": 0.4836176171627794,
      "MoBERT-max(F/N)": 0.550875248647662,
      "MotionCritic": -9.042963981628418,
      "VeMo (human-opt view)": 0.6788511749347258,
      "VeMo (max entropy view)": 0.6788511749347258,
      "VeMo (min entropy view)": 0.6790945406125166,
      "VeMo (random view)": 0.6790945406125166,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is dancing with expressive arm movements and then jumps forward."
  },
  "005321": {
    "text": "a person who is standing with his hands at this sides reaches down to his left, picks up something, moves the object to his right and places it down and returns to his standing position with his hands to his sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.34787324044725393,
      "Minus Multimodal Distance": -8.61548137664795,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3903987312223762e-05,
      "MoBERT-F": 0.3490892403862971,
      "MoBERT-N": 0.46638627083347567,
      "MoBERT-min(F/N)": 0.3490892403862971,
      "MoBERT-max(F/N)": 0.46638627083347567,
      "MotionCritic": -2.4445641040802,
      "VeMo (human-opt view)": 0.8181818181818182,
      "VeMo (max entropy view)": 0.5317220543806647,
      "VeMo (min entropy view)": 0.8181818181818182,
      "VeMo (random view)": 0.5317220543806647,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who is standing with his hands at his sides reaches down to his left, picks up something, moves the object to his right, places it down, and returns to his standing position with his hands at his sides."
  },
  "005347": {
    "text": "a person walks on a beam with arms straight up in the air, stops and lowers his arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8775789162182264,
      "Minus Multimodal Distance": -8.617913246154785,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.9286951252724975e-05,
      "MoBERT-F": 0.3977133926283366,
      "MoBERT-N": 0.47254837107186426,
      "MoBERT-min(F/N)": 0.3977133926283366,
      "MoBERT-max(F/N)": 0.47254837107186426,
      "MotionCritic": -9.64016342163086,
      "VeMo (human-opt view)": 5.949075820153898e-06,
      "VeMo (max entropy view)": 5.949075820153898e-06,
      "VeMo (min entropy view)": 4.502573220595571e-06,
      "VeMo (random view)": 4.502573220595571e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks on a beam with their arms straight up in the air, then stops and lowers their arms."
  },
  "005356": {
    "text": "a person repeatedly blocks their face with their left arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9963851438141935,
      "Minus Multimodal Distance": -4.399360179901123,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00015883798187132925,
      "MoBERT-F": 0.532217411107196,
      "MoBERT-N": 0.5573547088757262,
      "MoBERT-min(F/N)": 0.532217411107196,
      "MoBERT-max(F/N)": 0.5573547088757262,
      "MotionCritic": -4.45486307144165,
      "VeMo (human-opt view)": 0.7306122448979592,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.7306122448979592,
      "VeMo (random view)": 0.7306122448979592,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person repeatedly blocks their face with their left arm."
  },
  "005364": {
    "text": "a person walks forward as their right hand guides them",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1860475472769707,
      "Minus Multimodal Distance": -10.624210357666016,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.660817674244754e-05,
      "MoBERT-F": 0.4741893543125115,
      "MoBERT-N": 0.5485518951252315,
      "MoBERT-min(F/N)": 0.4741893543125115,
      "MoBERT-max(F/N)": 0.5485518951252315,
      "MotionCritic": -9.499838829040527,
      "VeMo (human-opt view)": 0.9706884798909339,
      "VeMo (max entropy view)": 0.949685534591195,
      "VeMo (min entropy view)": 0.9706884798909339,
      "VeMo (random view)": 0.9706884798909339,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward as their right hand guides them."
  },
  "005376": {
    "text": "person walks straight in pretty slow phase, turns around and walks in oppsite direction.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8017183688597012,
      "Minus Multimodal Distance": -7.906997203826904,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.4922730264952406e-05,
      "MoBERT-F": 0.5512406971967728,
      "MoBERT-N": 0.5536958261004243,
      "MoBERT-min(F/N)": 0.5512406971967728,
      "MoBERT-max(F/N)": 0.5536958261004243,
      "MotionCritic": -13.493819236755371,
      "VeMo (human-opt view)": 0.9770491803278688,
      "VeMo (max entropy view)": 0.964727163099186,
      "VeMo (min entropy view)": 0.9770491803278688,
      "VeMo (random view)": 0.964727163099186,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks straight at a pretty slow pace, turns around, and walks in the opposite direction."
  },
  "005399": {
    "text": "a man stands on the ground and lifts his right hand for a moment above the shoulder and then puts it down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.48636722349379885,
      "Minus Multimodal Distance": -7.760921001434326,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.158289448241703e-05,
      "MoBERT-F": 0.3418443571040479,
      "MoBERT-N": 0.52543775951119,
      "MoBERT-min(F/N)": 0.3418443571040479,
      "MoBERT-max(F/N)": 0.52543775951119,
      "MotionCritic": -16.06627082824707,
      "VeMo (human-opt view)": 0.9972002434570907,
      "VeMo (max entropy view)": 0.9972002434570907,
      "VeMo (min entropy view)": 0.9985906986108315,
      "VeMo (random view)": 0.9985906986108315,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man stands on the ground, lifts his right hand above his shoulder for a moment, and then puts it down."
  },
  "005419": {
    "text": "a person shields themselves with their left arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2213771292475233,
      "Minus Multimodal Distance": -11.020618438720703,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9756933145108633e-05,
      "MoBERT-F": 0.47858589651358163,
      "MoBERT-N": 0.5617217031163377,
      "MoBERT-min(F/N)": 0.47858589651358163,
      "MoBERT-max(F/N)": 0.5617217031163377,
      "MotionCritic": -5.8299407958984375,
      "VeMo (human-opt view)": 0.7443181818181818,
      "VeMo (max entropy view)": 0.6791171477079796,
      "VeMo (min entropy view)": 0.7443181818181818,
      "VeMo (random view)": 0.6791171477079796,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person shields themselves with their left arm."
  },
  "005433": {
    "text": "person sits on floor with legs crossed",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0733284374650898,
      "Minus Multimodal Distance": -6.385742664337158,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.063440090045333e-05,
      "MoBERT-F": 0.358834994681899,
      "MoBERT-N": 0.46279991304880297,
      "MoBERT-min(F/N)": 0.358834994681899,
      "MoBERT-max(F/N)": 0.46279991304880297,
      "MotionCritic": -5.605477809906006,
      "VeMo (human-opt view)": 0.9243316719528772,
      "VeMo (max entropy view)": 0.3346613545816733,
      "VeMo (min entropy view)": 0.9243316719528772,
      "VeMo (random view)": 0.9243316719528772,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits on the floor with their legs crossed."
  },
  "005457": {
    "text": "person stands still with both arms raised at shoulder height",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8037786114306085,
      "Minus Multimodal Distance": -7.554959297180176,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.436649265291635e-05,
      "MoBERT-F": 0.38645481143172206,
      "MoBERT-N": 0.5175701842228705,
      "MoBERT-min(F/N)": 0.38645481143172206,
      "MoBERT-max(F/N)": 0.5175701842228705,
      "MotionCritic": -3.2390072345733643,
      "VeMo (human-opt view)": 0.9465400271370421,
      "VeMo (max entropy view)": 0.9465400271370421,
      "VeMo (min entropy view)": 0.957933868127568,
      "VeMo (random view)": 0.957933868127568,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands still with both arms raised to shoulder height."
  },
  "005472": {
    "text": "the person is walking and making a right turn.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9428859831692975,
      "Minus Multimodal Distance": -6.622605323791504,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.958646240993403e-05,
      "MoBERT-F": 0.5318462552693566,
      "MoBERT-N": 0.5775821226169651,
      "MoBERT-min(F/N)": 0.5318462552693566,
      "MoBERT-max(F/N)": 0.5775821226169651,
      "MotionCritic": -3.4793906211853027,
      "VeMo (human-opt view)": 0.98408229561451,
      "VeMo (max entropy view)": 0.98408229561451,
      "VeMo (min entropy view)": 0.9902956039533116,
      "VeMo (random view)": 0.98408229561451,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking and making a right turn."
  },
  "005481": {
    "text": "a figure walks confidently forward then raises their hand as if to access a door nob",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2506495234906834,
      "Minus Multimodal Distance": -8.410533905029297,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2802931198384613e-05,
      "MoBERT-F": 0.4147405811449407,
      "MoBERT-N": 0.5753001310248429,
      "MoBERT-min(F/N)": 0.4147405811449407,
      "MoBERT-max(F/N)": 0.5753001310248429,
      "MotionCritic": -8.346209526062012,
      "VeMo (human-opt view)": 0.7882496940024479,
      "VeMo (max entropy view)": 0.7188703465982028,
      "VeMo (min entropy view)": 0.7882496940024479,
      "VeMo (random view)": 0.7882496940024479,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks confidently forward, then raises their hand as if to reach for a doorknob."
  },
  "005485": {
    "text": "a man is walking forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2445177278432145,
      "Minus Multimodal Distance": -9.312762260437012,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8350092172622681,
      "MoBERT-F": 0.9595350856722076,
      "MoBERT-N": 0.8008132842108966,
      "MoBERT-min(F/N)": 0.8008132842108966,
      "MoBERT-max(F/N)": 0.9595350856722076,
      "MotionCritic": -10.780994415283203,
      "VeMo (human-opt view)": 0.9830735963015436,
      "VeMo (max entropy view)": 0.9739934502022731,
      "VeMo (min entropy view)": 0.9830735963015436,
      "VeMo (random view)": 0.9830735963015436,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is walking forward."
  },
  "005486": {
    "text": "the man throws  both hands",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1481415916233402,
      "Minus Multimodal Distance": -7.311617374420166,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00010364802437834442,
      "MoBERT-F": 0.513106769238605,
      "MoBERT-N": 0.5201483996360111,
      "MoBERT-min(F/N)": 0.513106769238605,
      "MoBERT-max(F/N)": 0.5201483996360111,
      "MotionCritic": -8.102438926696777,
      "VeMo (human-opt view)": 0.8989784335981839,
      "VeMo (max entropy view)": 0.8989784335981839,
      "VeMo (min entropy view)": 0.915068493150685,
      "VeMo (random view)": 0.915068493150685,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man throws both hands."
  },
  "005537": {
    "text": "a figure walks upstairs without a handrail.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1985429000793335,
      "Minus Multimodal Distance": -11.000763893127441,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6998843168257736e-05,
      "MoBERT-F": 0.320237634188672,
      "MoBERT-N": 0.4535431654528048,
      "MoBERT-min(F/N)": 0.320237634188672,
      "MoBERT-max(F/N)": 0.4535431654528048,
      "MotionCritic": -8.978893280029297,
      "VeMo (human-opt view)": 0.28205128205128205,
      "VeMo (max entropy view)": 0.3073005093378608,
      "VeMo (min entropy view)": 0.28205128205128205,
      "VeMo (random view)": 0.3073005093378608,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks upstairs without a handrail."
  },
  "005561": {
    "text": "the sim appears to be standing an object onto their right side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.470069991590525,
      "Minus Multimodal Distance": -8.053521156311035,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4151459001586773e-05,
      "MoBERT-F": 0.38785029481758804,
      "MoBERT-N": 0.526774758310067,
      "MoBERT-min(F/N)": 0.38785029481758804,
      "MoBERT-max(F/N)": 0.526774758310067,
      "MotionCritic": -1.6979271173477173,
      "VeMo (human-opt view)": 0.46835443037974683,
      "VeMo (max entropy view)": 0.46835443037974683,
      "VeMo (min entropy view)": 0.4372623574144487,
      "VeMo (random view)": 0.4372623574144487,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person appears to be standing beside an object on their right side."
  },
  "005583": {
    "text": "a figure elegantly stretches, slowly, with their arms extended and right leg extended",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1261679642215612,
      "Minus Multimodal Distance": -6.2638373374938965,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.246324766019825e-05,
      "MoBERT-F": 0.3934652467571767,
      "MoBERT-N": 0.48635099620722466,
      "MoBERT-min(F/N)": 0.3934652467571767,
      "MoBERT-max(F/N)": 0.48635099620722466,
      "MotionCritic": -9.938591003417969,
      "VeMo (human-opt view)": 0.5460526315789473,
      "VeMo (max entropy view)": 0.5460526315789473,
      "VeMo (min entropy view)": 0.6372980910425844,
      "VeMo (random view)": 0.6372980910425844,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person elegantly stretches, slowly, with their arms extended and right leg extended."
  },
  "005610": {
    "text": "person runs backwards performing an a like pattern",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.8324185298393507,
      "Minus Multimodal Distance": -7.786043167114258,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0005190999945625663,
      "MoBERT-F": 0.41516923971828756,
      "MoBERT-N": 0.5307630005707149,
      "MoBERT-min(F/N)": 0.41516923971828756,
      "MoBERT-max(F/N)": 0.5307630005707149,
      "MotionCritic": -19.430599212646484,
      "VeMo (human-opt view)": 0.6224188790560472,
      "VeMo (max entropy view)": 0.6224188790560472,
      "VeMo (min entropy view)": 0.9047013977128335,
      "VeMo (random view)": 0.6224188790560472,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person runs backwards, performing an \"A\"-like pattern."
  },
  "005628": {
    "text": "someone walks with difficulty on their right side, then tries to run",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7182542350948773,
      "Minus Multimodal Distance": -8.710090637207031,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.6760669900104403e-05,
      "MoBERT-F": 0.3438052989255085,
      "MoBERT-N": 0.44012050835992933,
      "MoBERT-min(F/N)": 0.3438052989255085,
      "MoBERT-max(F/N)": 0.44012050835992933,
      "MotionCritic": -2.7936606407165527,
      "VeMo (human-opt view)": 0.30782029950083195,
      "VeMo (max entropy view)": 0.30782029950083195,
      "VeMo (min entropy view)": 0.02040816326530612,
      "VeMo (random view)": 0.02040816326530612,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone walks with difficulty on their right side and then tries to run."
  },
  "005668": {
    "text": "the person is hand mixing dough to bake something",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.735057810927228,
      "Minus Multimodal Distance": -3.945809841156006,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.530794699850958e-05,
      "MoBERT-F": 0.41164024172395064,
      "MoBERT-N": 0.43631443791215313,
      "MoBERT-min(F/N)": 0.41164024172395064,
      "MoBERT-max(F/N)": 0.43631443791215313,
      "MotionCritic": -10.457954406738281,
      "VeMo (human-opt view)": 2.4099549646431897e-06,
      "VeMo (max entropy view)": 2.4872795305387472e-06,
      "VeMo (min entropy view)": 2.4099549646431897e-06,
      "VeMo (random view)": 2.4872795305387472e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is hand - mixing dough to bake something."
  },
  "005672": {
    "text": "the person pulls the book off of the shelf",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4652145622887188,
      "Minus Multimodal Distance": -6.169924736022949,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.674416701775044e-05,
      "MoBERT-F": 0.553201462318011,
      "MoBERT-N": 0.5908604284219682,
      "MoBERT-min(F/N)": 0.553201462318011,
      "MoBERT-max(F/N)": 0.5908604284219682,
      "MotionCritic": -8.680115699768066,
      "VeMo (human-opt view)": 1.2273223177285383e-05,
      "VeMo (max entropy view)": 1.833698042935571e-05,
      "VeMo (min entropy view)": 1.2273223177285383e-05,
      "VeMo (random view)": 1.2273223177285383e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person pulls the book off the shelf."
  },
  "005674": {
    "text": "a person sits down, jiggles his right knee, raises both hands quickly to his shoulders then rubs his stomach with his left hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8934176147484656,
      "Minus Multimodal Distance": -6.258814334869385,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.126386582152918e-05,
      "MoBERT-F": 0.44952942217747244,
      "MoBERT-N": 0.47961899197700053,
      "MoBERT-min(F/N)": 0.44952942217747244,
      "MoBERT-max(F/N)": 0.47961899197700053,
      "MotionCritic": 0.5216948390007019,
      "VeMo (human-opt view)": 0.7663551401869159,
      "VeMo (max entropy view)": 0.5617647058823529,
      "VeMo (min entropy view)": 0.7663551401869159,
      "VeMo (random view)": 0.5617647058823529,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits down, jiggles his right knee, raises both hands quickly to his shoulders, then rubs his stomach with his left hand."
  },
  "005676": {
    "text": "a man continuing bending forward at the waist with his arms dangling in front of him.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8650393516526108,
      "Minus Multimodal Distance": -6.610343933105469,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2805281332693994e-05,
      "MoBERT-F": 0.3588447692664982,
      "MoBERT-N": 0.4530647237204481,
      "MoBERT-min(F/N)": 0.3588447692664982,
      "MoBERT-max(F/N)": 0.4530647237204481,
      "MotionCritic": -5.3030290603637695,
      "VeMo (human-opt view)": 0.437125748502994,
      "VeMo (max entropy view)": 0.437125748502994,
      "VeMo (min entropy view)": 0.5938375350140056,
      "VeMo (random view)": 0.5938375350140056,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man continues to bend forward at the waist with his arms dangling in front of him."
  },
  "005698": {
    "text": "a person swing with the legs and dance",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.89334986501004,
      "Minus Multimodal Distance": -11.319634437561035,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.03759504482150078,
      "MoBERT-F": 0.6213267850300384,
      "MoBERT-N": 0.7172225577112658,
      "MoBERT-min(F/N)": 0.6213267850300384,
      "MoBERT-max(F/N)": 0.7172225577112658,
      "MotionCritic": -11.80385971069336,
      "VeMo (human-opt view)": 0.9784615384615385,
      "VeMo (max entropy view)": 0.9784615384615385,
      "VeMo (min entropy view)": 0.9819853397937632,
      "VeMo (random view)": 0.9819853397937632,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person swings their legs and dances."
  },
  "005712": {
    "text": "sitting down crisscrossed, the right arm chucks forward and chucks forward again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.830830045421271,
      "Minus Multimodal Distance": -10.405077934265137,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6363371944171377e-05,
      "MoBERT-F": 0.3732191298517719,
      "MoBERT-N": 0.3961220221371795,
      "MoBERT-min(F/N)": 0.3732191298517719,
      "MoBERT-max(F/N)": 0.3961220221371795,
      "MotionCritic": -15.99425983428955,
      "VeMo (human-opt view)": 0.014991005396761943,
      "VeMo (max entropy view)": 0.22283205268935236,
      "VeMo (min entropy view)": 0.014991005396761943,
      "VeMo (random view)": 0.22283205268935236,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sitting down crisscrossed, the right arm thrusts forward and thrusts forward again."
  },
  "005729": {
    "text": "a person sways to the right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.548220619915657,
      "Minus Multimodal Distance": -12.710561752319336,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8526264941319823e-05,
      "MoBERT-F": 0.4639452426087568,
      "MoBERT-N": 0.5780045333203254,
      "MoBERT-min(F/N)": 0.4639452426087568,
      "MoBERT-max(F/N)": 0.5780045333203254,
      "MotionCritic": -5.948098659515381,
      "VeMo (human-opt view)": 0.8871670702179176,
      "VeMo (max entropy view)": 0.8075370121130552,
      "VeMo (min entropy view)": 0.8871670702179176,
      "VeMo (random view)": 0.8075370121130552,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sways to the right."
  },
  "005744": {
    "text": "man stands straight up with his hands out in front of him and creates a patting motion in the air continuously.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.670824649810971,
      "Minus Multimodal Distance": -7.301822185516357,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.14676321472507e-05,
      "MoBERT-F": 0.46523134239701214,
      "MoBERT-N": 0.5490998655813895,
      "MoBERT-min(F/N)": 0.46523134239701214,
      "MoBERT-max(F/N)": 0.5490998655813895,
      "MotionCritic": -11.989493370056152,
      "VeMo (human-opt view)": 0.532,
      "VeMo (max entropy view)": 0.532,
      "VeMo (min entropy view)": 0.7985480943738656,
      "VeMo (random view)": 0.532,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man stands straight up with his hands out in front of him and continuously creates a patting motion in the air."
  },
  "005747": {
    "text": "a man dances rhythmically from side to side and then turns",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8552788845287309,
      "Minus Multimodal Distance": -9.141538619995117,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0007929553976282477,
      "MoBERT-F": 0.6878385204211208,
      "MoBERT-N": 0.7106076361371992,
      "MoBERT-min(F/N)": 0.6878385204211208,
      "MoBERT-max(F/N)": 0.7106076361371992,
      "MotionCritic": -6.411609649658203,
      "VeMo (human-opt view)": 0.8992460589444825,
      "VeMo (max entropy view)": 0.7311827956989247,
      "VeMo (min entropy view)": 0.8992460589444825,
      "VeMo (random view)": 0.7311827956989247,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man dances rhythmically from side to side and then turns."
  },
  "005751": {
    "text": "a person balances on their right foot while kicking in multiple directions with their left foot.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1811754624311916,
      "Minus Multimodal Distance": -7.606357097625732,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.435938222333789e-05,
      "MoBERT-F": 0.4393662942439631,
      "MoBERT-N": 0.5260760144707222,
      "MoBERT-min(F/N)": 0.4393662942439631,
      "MoBERT-max(F/N)": 0.5260760144707222,
      "MotionCritic": -6.900158405303955,
      "VeMo (human-opt view)": 0.9152,
      "VeMo (max entropy view)": 0.9152,
      "VeMo (min entropy view)": 0.9553170731707317,
      "VeMo (random view)": 0.9152,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person balances on their right foot while kicking in multiple directions with their left foot."
  },
  "005790": {
    "text": "a person dances with someone.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1144164096534441,
      "Minus Multimodal Distance": -8.412333488464355,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9470522403717041,
      "MoBERT-F": 0.8140596493107897,
      "MoBERT-N": 0.7102349267008512,
      "MoBERT-min(F/N)": 0.7102349267008512,
      "MoBERT-max(F/N)": 0.8140596493107897,
      "MotionCritic": -8.980892181396484,
      "VeMo (human-opt view)": 0.1404561824729892,
      "VeMo (max entropy view)": 0.28192161820480405,
      "VeMo (min entropy view)": 0.1404561824729892,
      "VeMo (random view)": 0.1404561824729892,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person dances with someone."
  },
  "005793": {
    "text": "the person is walking on a treadmill slowly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3987303342794686,
      "Minus Multimodal Distance": -8.840363502502441,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.005727655254304409,
      "MoBERT-F": 0.6972178968894531,
      "MoBERT-N": 0.665878795478771,
      "MoBERT-min(F/N)": 0.665878795478771,
      "MoBERT-max(F/N)": 0.6972178968894531,
      "MotionCritic": -5.483839988708496,
      "VeMo (human-opt view)": 0.6078886310904872,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.6078886310904872,
      "VeMo (random view)": 0.5,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking slowly on a treadmill."
  },
  "005799": {
    "text": "man walks forward while upper body is leaning slightly to the left and steps are unbalanced and slow.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0779569220926164,
      "Minus Multimodal Distance": -10.275349617004395,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.043156951665878296,
      "MoBERT-F": 0.6063413171234383,
      "MoBERT-N": 0.6487785055098707,
      "MoBERT-min(F/N)": 0.6063413171234383,
      "MoBERT-max(F/N)": 0.6487785055098707,
      "MotionCritic": -3.860752820968628,
      "VeMo (human-opt view)": 0.6516516516516516,
      "VeMo (max entropy view)": 0.6516516516516516,
      "VeMo (min entropy view)": 0.6666666666666666,
      "VeMo (random view)": 0.6666666666666666,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man walks forward with his upper body leaning slightly to the left. His steps are unbalanced and slow."
  },
  "005851": {
    "text": "a person, standing, raises his right hand as if to check his watch and then lowers his hand back to his side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.46271899042960485,
      "Minus Multimodal Distance": -11.584388732910156,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7449170374893583e-05,
      "MoBERT-F": 0.26547604396736335,
      "MoBERT-N": 0.4235931326800505,
      "MoBERT-min(F/N)": 0.26547604396736335,
      "MoBERT-max(F/N)": 0.4235931326800505,
      "MotionCritic": -4.330827236175537,
      "VeMo (human-opt view)": 0.9150991357397051,
      "VeMo (max entropy view)": 0.9150991357397051,
      "VeMo (min entropy view)": 0.9831241886629165,
      "VeMo (random view)": 0.9150991357397051,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person, while standing, raises his right hand as if to check his watch and then lowers his hand back to his side."
  },
  "005869": {
    "text": "a person is walking around the room.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.1059475627848157,
      "Minus Multimodal Distance": -9.143686294555664,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.33745354413986206,
      "MoBERT-F": 0.7056941994707048,
      "MoBERT-N": 0.5689076893242078,
      "MoBERT-min(F/N)": 0.5689076893242078,
      "MoBERT-max(F/N)": 0.7056941994707048,
      "MotionCritic": -13.493819236755371,
      "VeMo (human-opt view)": 0.9890085972358255,
      "VeMo (max entropy view)": 0.9890085972358255,
      "VeMo (min entropy view)": 0.9902601405498705,
      "VeMo (random view)": 0.9890085972358255,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking around the room."
  },
  "005891": {
    "text": "the man picks up a brushes nd brushes someone's hair then puts the brush back down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2761042024250457,
      "Minus Multimodal Distance": -6.638826370239258,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.167403701809235e-05,
      "MoBERT-F": 0.42273158868619487,
      "MoBERT-N": 0.5182635464199065,
      "MoBERT-min(F/N)": 0.42273158868619487,
      "MoBERT-max(F/N)": 0.5182635464199065,
      "MotionCritic": -8.48251724243164,
      "VeMo (human-opt view)": 7.886502936007887e-06,
      "VeMo (max entropy view)": 7.886502936007887e-06,
      "VeMo (min entropy view)": 3.7263802022188208e-06,
      "VeMo (random view)": 3.7263802022188208e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man picks up a brush and brushes someone's hair, then puts the brush back down."
  },
  "005904": {
    "text": "a person does four jumping jacks then two forward leg jumps.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4108216946104142,
      "Minus Multimodal Distance": -9.74728775024414,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0005674370913766325,
      "MoBERT-F": 0.5806672815232712,
      "MoBERT-N": 0.696840250111362,
      "MoBERT-min(F/N)": 0.5806672815232712,
      "MoBERT-max(F/N)": 0.696840250111362,
      "MotionCritic": -1.2973618507385254,
      "VeMo (human-opt view)": 0.7435294117647059,
      "VeMo (max entropy view)": 0.6782178217821783,
      "VeMo (min entropy view)": 0.7435294117647059,
      "VeMo (random view)": 0.6782178217821783,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does four jumping jacks, then two forward leg jumps."
  },
  "005933": {
    "text": "the person stands still in a slight squat and then turns to their left and walks.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7671588180095963,
      "Minus Multimodal Distance": -10.545588493347168,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.205655957572162e-05,
      "MoBERT-F": 0.29749149354096593,
      "MoBERT-N": 0.34236328121379817,
      "MoBERT-min(F/N)": 0.29749149354096593,
      "MoBERT-max(F/N)": 0.34236328121379817,
      "MotionCritic": -10.820842742919922,
      "VeMo (human-opt view)": 0.9649063032367973,
      "VeMo (max entropy view)": 0.9524762381190596,
      "VeMo (min entropy view)": 0.9649063032367973,
      "VeMo (random view)": 0.9649063032367973,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person stands still in a slight squat, then turns to their left and walks."
  },
  "005935": {
    "text": "place items in a line up",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2761909676799401,
      "Minus Multimodal Distance": -10.07052230834961,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8185302653582767e-05,
      "MoBERT-F": 0.4270625416548828,
      "MoBERT-N": 0.5380094732657383,
      "MoBERT-min(F/N)": 0.4270625416548828,
      "MoBERT-max(F/N)": 0.5380094732657383,
      "MotionCritic": -4.62264347076416,
      "VeMo (human-opt view)": 4.135181973437095e-05,
      "VeMo (max entropy view)": 4.135181973437095e-05,
      "VeMo (min entropy view)": 2.2137261275589016e-05,
      "VeMo (random view)": 4.135181973437095e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person places items in a line up."
  },
  "005937": {
    "text": "a person walks diagonally forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4425296667320666,
      "Minus Multimodal Distance": -12.058895111083984,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9427533745765686,
      "MoBERT-F": 0.914355893813342,
      "MoBERT-N": 0.8484576861582506,
      "MoBERT-min(F/N)": 0.8484576861582506,
      "MoBERT-max(F/N)": 0.914355893813342,
      "MotionCritic": -6.35145902633667,
      "VeMo (human-opt view)": 0.9045362220717671,
      "VeMo (max entropy view)": 0.9045362220717671,
      "VeMo (min entropy view)": 0.9363867684478372,
      "VeMo (random view)": 0.9363867684478372,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks diagonally forward."
  },
  "005951": {
    "text": "a person stands relaxed seems to observe something",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5296378970506799,
      "Minus Multimodal Distance": -10.840995788574219,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.478516009636223e-05,
      "MoBERT-F": 0.47159309044495695,
      "MoBERT-N": 0.49566111360430054,
      "MoBERT-min(F/N)": 0.47159309044495695,
      "MoBERT-max(F/N)": 0.49566111360430054,
      "MotionCritic": -14.888176918029785,
      "VeMo (human-opt view)": 0.3487394957983193,
      "VeMo (max entropy view)": 0.3487394957983193,
      "VeMo (min entropy view)": 0.23439099283520984,
      "VeMo (random view)": 0.23439099283520984,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands relaxed and seems to be observing something."
  },
  "005955": {
    "text": "a person puts their hands together, leans forwards slightly then swings the arms from right to left",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2568688451718892,
      "Minus Multimodal Distance": -10.817049980163574,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00024397191009484231,
      "MoBERT-F": 0.47797742174023206,
      "MoBERT-N": 0.462869567849608,
      "MoBERT-min(F/N)": 0.462869567849608,
      "MoBERT-max(F/N)": 0.47797742174023206,
      "MotionCritic": -11.329338073730469,
      "VeMo (human-opt view)": 0.7556029882604055,
      "VeMo (max entropy view)": 0.6650717703349283,
      "VeMo (min entropy view)": 0.7556029882604055,
      "VeMo (random view)": 0.7556029882604055,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person puts their hands together, leans forward slightly, then swings the arms from right to left."
  },
  "005981": {
    "text": "the person is doing arm exercises like a windmill.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9832807369014316,
      "Minus Multimodal Distance": -10.334975242614746,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.979391872882843,
      "MoBERT-F": 0.6982335796610397,
      "MoBERT-N": 0.5442265724028704,
      "MoBERT-min(F/N)": 0.5442265724028704,
      "MoBERT-max(F/N)": 0.6982335796610397,
      "MotionCritic": -4.142744541168213,
      "VeMo (human-opt view)": 0.9436298468556533,
      "VeMo (max entropy view)": 0.9436298468556533,
      "VeMo (min entropy view)": 0.979678015307469,
      "VeMo (random view)": 0.9436298468556533,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is doing arm exercises like a windmill."
  },
  "006006": {
    "text": "a figure puts two objects together from opposite positions.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5309869260279755,
      "Minus Multimodal Distance": -9.386711120605469,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.000708168838173151,
      "MoBERT-F": 0.6338422069446168,
      "MoBERT-N": 0.6027855508401286,
      "MoBERT-min(F/N)": 0.6027855508401286,
      "MoBERT-max(F/N)": 0.6338422069446168,
      "MotionCritic": -10.054884910583496,
      "VeMo (human-opt view)": 5.161910927279606e-05,
      "VeMo (max entropy view)": 5.161910927279606e-05,
      "VeMo (min entropy view)": 4.1491476713778674e-05,
      "VeMo (random view)": 4.1491476713778674e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person puts two objects together from opposite positions."
  },
  "006022": {
    "text": "someone runs backwards in a counterclockwise motion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.083890159388635,
      "Minus Multimodal Distance": -3.8925840854644775,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.010085020214319229,
      "MoBERT-F": 0.6159309518112213,
      "MoBERT-N": 0.5932066835146182,
      "MoBERT-min(F/N)": 0.5932066835146182,
      "MoBERT-max(F/N)": 0.6159309518112213,
      "MotionCritic": -5.502712726593018,
      "VeMo (human-opt view)": 0.36293436293436293,
      "VeMo (max entropy view)": 0.36293436293436293,
      "VeMo (min entropy view)": 0.7311827956989247,
      "VeMo (random view)": 0.7311827956989247,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone runs backward in a counter - clockwise motion."
  },
  "006053": {
    "text": "waving arms around randomly quickly.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1602688290361842,
      "Minus Multimodal Distance": -7.087940216064453,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5858280423562974e-05,
      "MoBERT-F": 0.5357401883813633,
      "MoBERT-N": 0.5802375499674248,
      "MoBERT-min(F/N)": 0.5357401883813633,
      "MoBERT-max(F/N)": 0.5802375499674248,
      "MotionCritic": -10.256117820739746,
      "VeMo (human-opt view)": 0.9102730819245773,
      "VeMo (max entropy view)": 0.9102730819245773,
      "VeMo (min entropy view)": 0.9364651520985753,
      "VeMo (random view)": 0.9102730819245773,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is waving arms around randomly quickly."
  },
  "006058": {
    "text": "the sim walks down the plane in a wobbly fashion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9265851733662843,
      "Minus Multimodal Distance": -10.585841178894043,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5449166059843265e-05,
      "MoBERT-F": 0.4221216576557307,
      "MoBERT-N": 0.4997791002066427,
      "MoBERT-min(F/N)": 0.4221216576557307,
      "MoBERT-max(F/N)": 0.4997791002066427,
      "MotionCritic": -15.21349048614502,
      "VeMo (human-opt view)": 0.8441926345609065,
      "VeMo (max entropy view)": 0.8441926345609065,
      "VeMo (min entropy view)": 0.8514412416851441,
      "VeMo (random view)": 0.8514412416851441,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person walks down the plane in a wobbly fashion."
  },
  "006068": {
    "text": "a walking person suddenly gets staggered to their left, then recovers.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.078865374072421,
      "Minus Multimodal Distance": -9.88778305053711,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0753788873553276,
      "MoBERT-F": 0.6519486285201308,
      "MoBERT-N": 0.5704907854662153,
      "MoBERT-min(F/N)": 0.5704907854662153,
      "MoBERT-max(F/N)": 0.6519486285201308,
      "MotionCritic": -12.336602210998535,
      "VeMo (human-opt view)": 0.9797703663203936,
      "VeMo (max entropy view)": 0.9739383998542008,
      "VeMo (min entropy view)": 0.9797703663203936,
      "VeMo (random view)": 0.9797703663203936,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A walking person suddenly staggers to their left, then recovers."
  },
  "006089": {
    "text": "a person stands still and then bounces their hand as if playing with a yo-yo",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8214994493753058,
      "Minus Multimodal Distance": -5.730180263519287,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.2948908483376727e-05,
      "MoBERT-F": 0.3212466223682381,
      "MoBERT-N": 0.4504941406646414,
      "MoBERT-min(F/N)": 0.3212466223682381,
      "MoBERT-max(F/N)": 0.4504941406646414,
      "MotionCritic": -5.565532207489014,
      "VeMo (human-opt view)": 0.010309278350515464,
      "VeMo (max entropy view)": 0.010309278350515464,
      "VeMo (min entropy view)": 0.004906364453044019,
      "VeMo (random view)": 0.004906364453044019,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands still and then bounces their hand as if playing with a yo - yo."
  },
  "006095": {
    "text": "person seems to be exercising by bending at the knees and walking forward",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3880448155502536,
      "Minus Multimodal Distance": -8.173900604248047,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.19367131346371e-05,
      "MoBERT-F": 0.33432830226459265,
      "MoBERT-N": 0.35672249512845994,
      "MoBERT-min(F/N)": 0.33432830226459265,
      "MoBERT-max(F/N)": 0.35672249512845994,
      "MotionCritic": -0.9063634276390076,
      "VeMo (human-opt view)": 0.008083632164346927,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.008083632164346927,
      "VeMo (random view)": 0.008083632164346927,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person seems to be exercising by bending at the knees and walking forward."
  },
  "006106": {
    "text": "a person stands still for a moment, and then staggers forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5077201703163882,
      "Minus Multimodal Distance": -10.551545143127441,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.536670556059107e-05,
      "MoBERT-F": 0.41636098940756583,
      "MoBERT-N": 0.48735779244949184,
      "MoBERT-min(F/N)": 0.41636098940756583,
      "MoBERT-max(F/N)": 0.48735779244949184,
      "MotionCritic": -2.1759846210479736,
      "VeMo (human-opt view)": 0.9820187005514265,
      "VeMo (max entropy view)": 0.9098712446351931,
      "VeMo (min entropy view)": 0.9820187005514265,
      "VeMo (random view)": 0.9098712446351931,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands still for a moment and then staggers forward."
  },
  "006117": {
    "text": "someone being pushed back a few steps",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6536437620532063,
      "Minus Multimodal Distance": -12.390917778015137,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.13585323095321655,
      "MoBERT-F": 0.5281573266480893,
      "MoBERT-N": 0.4335900823243759,
      "MoBERT-min(F/N)": 0.4335900823243759,
      "MoBERT-max(F/N)": 0.5281573266480893,
      "MotionCritic": -1.705243468284607,
      "VeMo (human-opt view)": 0.5310344827586206,
      "VeMo (max entropy view)": 0.5310344827586206,
      "VeMo (min entropy view)": 0.4541577825159915,
      "VeMo (random view)": 0.5310344827586206,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone is pushed back a few steps."
  },
  "006118": {
    "text": "standing on one foot.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.859500540644234,
      "Minus Multimodal Distance": -11.32466983795166,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4012058929656632e-05,
      "MoBERT-F": 0.4365039166946473,
      "MoBERT-N": 0.5040023917189874,
      "MoBERT-min(F/N)": 0.4365039166946473,
      "MoBERT-max(F/N)": 0.5040023917189874,
      "MotionCritic": -10.031904220581055,
      "VeMo (human-opt view)": 0.8931572629051621,
      "VeMo (max entropy view)": 0.6796875,
      "VeMo (min entropy view)": 0.8931572629051621,
      "VeMo (random view)": 0.6796875,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing on one foot."
  },
  "006123": {
    "text": "a person uses their right hands to move their right foot to rest against the last leg in a yoga pose, then repeats the same motion with the left leg.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7532753577219521,
      "Minus Multimodal Distance": -11.966652870178223,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3499043891206384e-05,
      "MoBERT-F": 0.46359376402913255,
      "MoBERT-N": 0.5620157187714248,
      "MoBERT-min(F/N)": 0.46359376402913255,
      "MoBERT-max(F/N)": 0.5620157187714248,
      "MotionCritic": -5.324859619140625,
      "VeMo (human-opt view)": 0.29411764705882354,
      "VeMo (max entropy view)": 0.37786259541984735,
      "VeMo (min entropy view)": 0.29411764705882354,
      "VeMo (random view)": 0.29411764705882354,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person uses their right hand to move their right foot to rest against the last leg in a yoga pose, then repeats the same motion with the left leg."
  },
  "006132": {
    "text": "the man holds something above his left shoulder and rubs it with his right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7234983864495326,
      "Minus Multimodal Distance": -4.128361701965332,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.0409795474261045e-05,
      "MoBERT-F": 0.39068539228327753,
      "MoBERT-N": 0.42789573034220896,
      "MoBERT-min(F/N)": 0.39068539228327753,
      "MoBERT-max(F/N)": 0.42789573034220896,
      "MotionCritic": -5.715061664581299,
      "VeMo (human-opt view)": 0.027457556067910293,
      "VeMo (max entropy view)": 0.1329479768786127,
      "VeMo (min entropy view)": 0.027457556067910293,
      "VeMo (random view)": 0.1329479768786127,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man holds something above his left shoulder and rubs it with his right hand."
  },
  "006137": {
    "text": "a person propels himself and takes a long jump",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3165537745118951,
      "Minus Multimodal Distance": -11.765948295593262,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2513411749969237e-05,
      "MoBERT-F": 0.42760670767812026,
      "MoBERT-N": 0.5143806447209525,
      "MoBERT-min(F/N)": 0.42760670767812026,
      "MoBERT-max(F/N)": 0.5143806447209525,
      "MotionCritic": -12.587059020996094,
      "VeMo (human-opt view)": 0.00014453605532197816,
      "VeMo (max entropy view)": 0.0013218557565206177,
      "VeMo (min entropy view)": 0.00014453605532197816,
      "VeMo (random view)": 0.00014453605532197816,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person propels himself and takes a long jump."
  },
  "006163": {
    "text": "man moves his right hand to cover his groin.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4367005894499684,
      "Minus Multimodal Distance": -10.75288200378418,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.649584894243162e-05,
      "MoBERT-F": 0.32947199006764893,
      "MoBERT-N": 0.5231559359593463,
      "MoBERT-min(F/N)": 0.32947199006764893,
      "MoBERT-max(F/N)": 0.5231559359593463,
      "MotionCritic": -12.042759895324707,
      "VeMo (human-opt view)": 0.7302158273381295,
      "VeMo (max entropy view)": 0.7302158273381295,
      "VeMo (min entropy view)": 0.7543859649122807,
      "VeMo (random view)": 0.7543859649122807,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man moves his right hand to cover his groin."
  },
  "006172": {
    "text": "a person holds their hands together and bows their head a few times.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6747172502034604,
      "Minus Multimodal Distance": -7.250647068023682,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 9.81669727480039e-05,
      "MoBERT-F": 0.5418951576106515,
      "MoBERT-N": 0.5342298169282143,
      "MoBERT-min(F/N)": 0.5342298169282143,
      "MoBERT-max(F/N)": 0.5418951576106515,
      "MotionCritic": -10.721562385559082,
      "VeMo (human-opt view)": 0.1482889733840304,
      "VeMo (max entropy view)": 0.1482889733840304,
      "VeMo (min entropy view)": 0.0395558639833449,
      "VeMo (random view)": 0.1482889733840304,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person holds their hands together and bows their head a few times."
  },
  "006177": {
    "text": "waving hands in and out.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4761023670177774,
      "Minus Multimodal Distance": -11.697214126586914,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.629447408253327e-05,
      "MoBERT-F": 0.41488372275710517,
      "MoBERT-N": 0.4591828650440764,
      "MoBERT-min(F/N)": 0.41488372275710517,
      "MoBERT-max(F/N)": 0.4591828650440764,
      "MotionCritic": -5.426353931427002,
      "VeMo (human-opt view)": 0.7540983606557377,
      "VeMo (max entropy view)": 0.5933014354066986,
      "VeMo (min entropy view)": 0.7540983606557377,
      "VeMo (random view)": 0.5933014354066986,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is waving hands in and out."
  },
  "006185": {
    "text": "the person is running around in a circle.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6730848699849066,
      "Minus Multimodal Distance": -6.400814533233643,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3080567189026624e-05,
      "MoBERT-F": 0.418806014801711,
      "MoBERT-N": 0.41407850058809653,
      "MoBERT-min(F/N)": 0.41407850058809653,
      "MoBERT-max(F/N)": 0.418806014801711,
      "MotionCritic": -10.10981559753418,
      "VeMo (human-opt view)": 0.09523809523809523,
      "VeMo (max entropy view)": 0.3485064011379801,
      "VeMo (min entropy view)": 0.09523809523809523,
      "VeMo (random view)": 0.3485064011379801,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is running around in a circle."
  },
  "006186": {
    "text": "person makes washing motions with both hands on opposite upper arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1635298510517986,
      "Minus Multimodal Distance": -11.641474723815918,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00039252309943549335,
      "MoBERT-F": 0.5102210450707548,
      "MoBERT-N": 0.5020954834956013,
      "MoBERT-min(F/N)": 0.5020954834956013,
      "MoBERT-max(F/N)": 0.5102210450707548,
      "MotionCritic": -6.587223052978516,
      "VeMo (human-opt view)": 0.01683045295380097,
      "VeMo (max entropy view)": 0.017997843576345693,
      "VeMo (min entropy view)": 0.01683045295380097,
      "VeMo (random view)": 0.01683045295380097,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person makes washing motions with both hands on the opposite upper arms."
  },
  "006199": {
    "text": "a person picks something up on the left and sets it down on the right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.370822350467208,
      "Minus Multimodal Distance": -2.654940366744995,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4960610971902497e-05,
      "MoBERT-F": 0.3899263341596848,
      "MoBERT-N": 0.5283123050919801,
      "MoBERT-min(F/N)": 0.3899263341596848,
      "MoBERT-max(F/N)": 0.5283123050919801,
      "MotionCritic": -1.3070744276046753,
      "VeMo (human-opt view)": 0.7180451127819549,
      "VeMo (max entropy view)": 0.7180451127819549,
      "VeMo (min entropy view)": 0.7306397306397306,
      "VeMo (random view)": 0.7306397306397306,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person picks something up on the left and sets it down on the right."
  },
  "006204": {
    "text": "a person walks forward with left foot, then trips and continues walking forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.1946754413307388,
      "Minus Multimodal Distance": -4.237866401672363,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.6219696402549744,
      "MoBERT-F": 0.7404775491448327,
      "MoBERT-N": 0.6944311902028839,
      "MoBERT-min(F/N)": 0.6944311902028839,
      "MoBERT-max(F/N)": 0.7404775491448327,
      "MotionCritic": -9.376195907592773,
      "VeMo (human-opt view)": 0.8169868554095046,
      "VeMo (max entropy view)": 0.8169868554095046,
      "VeMo (min entropy view)": 0.8181818181818182,
      "VeMo (random view)": 0.8169868554095046,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward with the left foot, then trips and continues walking forward."
  },
  "006212": {
    "text": "person walks forward in a semi-straight line with arms extended out to the sides.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1912476516847326,
      "Minus Multimodal Distance": -7.089972972869873,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.752675522468053e-05,
      "MoBERT-F": 0.5771989287556438,
      "MoBERT-N": 0.510881100022804,
      "MoBERT-min(F/N)": 0.510881100022804,
      "MoBERT-max(F/N)": 0.5771989287556438,
      "MotionCritic": -4.40533447265625,
      "VeMo (human-opt view)": 0.39195979899497485,
      "VeMo (max entropy view)": 0.5469061876247505,
      "VeMo (min entropy view)": 0.39195979899497485,
      "VeMo (random view)": 0.5469061876247505,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward in a semi - straight line with their arms extended out to the sides."
  },
  "006213": {
    "text": "a person winds up his arm and then pitches a ball.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3817475126719,
      "Minus Multimodal Distance": -4.521970272064209,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9912405610084534,
      "MoBERT-F": 0.6651054265277134,
      "MoBERT-N": 0.5033779655226025,
      "MoBERT-min(F/N)": 0.5033779655226025,
      "MoBERT-max(F/N)": 0.6651054265277134,
      "MotionCritic": -8.964607238769531,
      "VeMo (human-opt view)": 0.6781857451403888,
      "VeMo (max entropy view)": 0.6781857451403888,
      "VeMo (min entropy view)": 0.844,
      "VeMo (random view)": 0.844,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person winds up his arm and then pitches a ball."
  },
  "006214": {
    "text": "a person stands up from a sitting position, makes an overhand throw motion, and underhand throw motion, then a basketball shot motion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4171164982612565,
      "Minus Multimodal Distance": -7.275778770446777,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3384740416076966e-05,
      "MoBERT-F": 0.41726574003572453,
      "MoBERT-N": 0.4641894280992481,
      "MoBERT-min(F/N)": 0.41726574003572453,
      "MoBERT-max(F/N)": 0.4641894280992481,
      "MotionCritic": -17.31173324584961,
      "VeMo (human-opt view)": 0.8597899938233478,
      "VeMo (max entropy view)": 0.8517308948399739,
      "VeMo (min entropy view)": 0.8597899938233478,
      "VeMo (random view)": 0.8597899938233478,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands up from a sitting position, makes an overhand throwing motion, an underhand throwing motion, and then a basketball - shooting motion."
  },
  "006215": {
    "text": "a figure seems to gesture contritely or sincerely with their left hand as though entreating someone",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.314856039067808,
      "Minus Multimodal Distance": -10.536377906799316,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.118429958703928e-05,
      "MoBERT-F": 0.4319635026138813,
      "MoBERT-N": 0.46428185742027245,
      "MoBERT-min(F/N)": 0.4319635026138813,
      "MoBERT-max(F/N)": 0.46428185742027245,
      "MotionCritic": -6.726835250854492,
      "VeMo (human-opt view)": 0.3068432671081678,
      "VeMo (max entropy view)": 0.3346774193548387,
      "VeMo (min entropy view)": 0.3068432671081678,
      "VeMo (random view)": 0.3068432671081678,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person seems to gesture contritely or sincerely with their left hand as though entreating someone."
  },
  "006235": {
    "text": "the man does a wavelike dance using his arms and slightly sways.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8963855554480316,
      "Minus Multimodal Distance": -12.050459861755371,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.281218520598486e-05,
      "MoBERT-F": 0.32696615707284404,
      "MoBERT-N": 0.4637021416227676,
      "MoBERT-min(F/N)": 0.32696615707284404,
      "MoBERT-max(F/N)": 0.4637021416227676,
      "MotionCritic": -3.0449728965759277,
      "VeMo (human-opt view)": 0.8597748208802457,
      "VeMo (max entropy view)": 0.8597748208802457,
      "VeMo (min entropy view)": 0.8990694345025053,
      "VeMo (random view)": 0.8990694345025053,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man performs a wavelike dance, using his arms and swaying slightly."
  },
  "006241": {
    "text": "the person is jogging with dumbbells.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9205282661702607,
      "Minus Multimodal Distance": -2.771423578262329,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.763135100598447e-05,
      "MoBERT-F": 0.41365564370526386,
      "MoBERT-N": 0.5446408339957273,
      "MoBERT-min(F/N)": 0.41365564370526386,
      "MoBERT-max(F/N)": 0.5446408339957273,
      "MotionCritic": -1.6741664409637451,
      "VeMo (human-opt view)": 1.9353941316549895e-06,
      "VeMo (max entropy view)": 1.9353941316549895e-06,
      "VeMo (min entropy view)": 1.8815701109752628e-06,
      "VeMo (random view)": 1.8815701109752628e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is jogging with dumbbells."
  },
  "006245": {
    "text": "a person holds their arms near their face and searches right and left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6838582967420607,
      "Minus Multimodal Distance": -12.326143264770508,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3497474103351124e-05,
      "MoBERT-F": 0.4724215782978906,
      "MoBERT-N": 0.5643601763790088,
      "MoBERT-min(F/N)": 0.4724215782978906,
      "MoBERT-max(F/N)": 0.5643601763790088,
      "MotionCritic": -7.322259426116943,
      "VeMo (human-opt view)": 0.8667496886674969,
      "VeMo (max entropy view)": 0.8667496886674969,
      "VeMo (min entropy view)": 0.8990694345025053,
      "VeMo (random view)": 0.8990694345025053,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person holds their arms near their face and looks right and left."
  },
  "006246": {
    "text": "a person takes a step forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5318411101003678,
      "Minus Multimodal Distance": -11.160165786743164,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2157384591992013e-05,
      "MoBERT-F": 0.42487464782574363,
      "MoBERT-N": 0.49499261795834226,
      "MoBERT-min(F/N)": 0.42487464782574363,
      "MoBERT-max(F/N)": 0.49499261795834226,
      "MotionCritic": -15.702624320983887,
      "VeMo (human-opt view)": 0.9808429118773946,
      "VeMo (max entropy view)": 0.9769530080814127,
      "VeMo (min entropy view)": 0.9808429118773946,
      "VeMo (random view)": 0.9808429118773946,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person takes a step forward."
  },
  "006249": {
    "text": "the person goes for a short jog",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1397058300592369,
      "Minus Multimodal Distance": -9.25888442993164,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.088670928264037e-05,
      "MoBERT-F": 0.5501458350108079,
      "MoBERT-N": 0.6873379519742444,
      "MoBERT-min(F/N)": 0.5501458350108079,
      "MoBERT-max(F/N)": 0.6873379519742444,
      "MotionCritic": -9.899210929870605,
      "VeMo (human-opt view)": 0.9924213320294765,
      "VeMo (max entropy view)": 0.9578837452140619,
      "VeMo (min entropy view)": 0.9924213320294765,
      "VeMo (random view)": 0.9924213320294765,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person goes for a short jog."
  },
  "006251": {
    "text": "the person is walking normally.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3875100663062818,
      "Minus Multimodal Distance": -11.272591590881348,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.000728760554920882,
      "MoBERT-F": 0.6951845318049771,
      "MoBERT-N": 0.7280548799731548,
      "MoBERT-min(F/N)": 0.6951845318049771,
      "MoBERT-max(F/N)": 0.7280548799731548,
      "MotionCritic": -15.28615951538086,
      "VeMo (human-opt view)": 0.9867535287730728,
      "VeMo (max entropy view)": 0.978296382730455,
      "VeMo (min entropy view)": 0.9867535287730728,
      "VeMo (random view)": 0.9867535287730728,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking normally."
  },
  "006258": {
    "text": "the man moves to the side",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9369167532750758,
      "Minus Multimodal Distance": -6.56082820892334,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00020266535284463316,
      "MoBERT-F": 0.5250773395140271,
      "MoBERT-N": 0.6002378267410976,
      "MoBERT-min(F/N)": 0.5250773395140271,
      "MoBERT-max(F/N)": 0.6002378267410976,
      "MotionCritic": -7.877522945404053,
      "VeMo (human-opt view)": 0.9604365620736699,
      "VeMo (max entropy view)": 0.8264462809917356,
      "VeMo (min entropy view)": 0.9604365620736699,
      "VeMo (random view)": 0.9604365620736699,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man moves to the side."
  },
  "006263": {
    "text": "walking backwards and then sitting.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9918579778933183,
      "Minus Multimodal Distance": -10.480305671691895,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.13334959745407104,
      "MoBERT-F": 0.5215325456249807,
      "MoBERT-N": 0.5429643523821439,
      "MoBERT-min(F/N)": 0.5215325456249807,
      "MoBERT-max(F/N)": 0.5429643523821439,
      "MotionCritic": -4.867091655731201,
      "VeMo (human-opt view)": 0.8517745302713987,
      "VeMo (max entropy view)": 0.776495278069255,
      "VeMo (min entropy view)": 0.8517745302713987,
      "VeMo (random view)": 0.776495278069255,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking backwards and then sitting."
  },
  "006281": {
    "text": "a person rolls his right ankle while walking",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6104683013036718,
      "Minus Multimodal Distance": -7.104364395141602,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.3473887015134096e-05,
      "MoBERT-F": 0.3321184743679245,
      "MoBERT-N": 0.4088394452401825,
      "MoBERT-min(F/N)": 0.3321184743679245,
      "MoBERT-max(F/N)": 0.4088394452401825,
      "MotionCritic": -19.47418785095215,
      "VeMo (human-opt view)": 0.008565700747220703,
      "VeMo (max entropy view)": 0.008565700747220703,
      "VeMo (min entropy view)": 0.002474331248660549,
      "VeMo (random view)": 0.002474331248660549,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person rolls their right ankle while walking."
  },
  "006304": {
    "text": "a person is crouched down and walking around sneakily.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.3494832380162944,
      "Minus Multimodal Distance": -7.570670127868652,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5949560949811712e-05,
      "MoBERT-F": 0.3764775671534583,
      "MoBERT-N": 0.3636834843749217,
      "MoBERT-min(F/N)": 0.3636834843749217,
      "MoBERT-max(F/N)": 0.3764775671534583,
      "MotionCritic": -8.067018508911133,
      "VeMo (human-opt view)": 0.6076696165191741,
      "VeMo (max entropy view)": 0.5769230769230769,
      "VeMo (min entropy view)": 0.6076696165191741,
      "VeMo (random view)": 0.6076696165191741,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is crouched down and walking around sneakily."
  },
  "006306": {
    "text": "a person pushes their right arm forward and then uses both hands together while bending over as if catching something.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8479765647972932,
      "Minus Multimodal Distance": -12.795025825500488,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0019377575954422355,
      "MoBERT-F": 0.48202216505649864,
      "MoBERT-N": 0.38364352235267885,
      "MoBERT-min(F/N)": 0.38364352235267885,
      "MoBERT-max(F/N)": 0.48202216505649864,
      "MotionCritic": -8.50556755065918,
      "VeMo (human-opt view)": 0.2938209331651955,
      "VeMo (max entropy view)": 0.4530612244897959,
      "VeMo (min entropy view)": 0.2938209331651955,
      "VeMo (random view)": 0.4530612244897959,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person extends their right arm forward, then, while bending over, uses both hands simultaneously as if catching something."
  },
  "006314": {
    "text": "this person walks slowly forward in a zig zag.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9641572452101245,
      "Minus Multimodal Distance": -9.210159301757812,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7038317057304084e-05,
      "MoBERT-F": 0.40348182838098157,
      "MoBERT-N": 0.5489017391853905,
      "MoBERT-min(F/N)": 0.40348182838098157,
      "MoBERT-max(F/N)": 0.5489017391853905,
      "MotionCritic": -12.14537525177002,
      "VeMo (human-opt view)": 0.6519607843137255,
      "VeMo (max entropy view)": 0.6519607843137255,
      "VeMo (min entropy view)": 0.6651270207852193,
      "VeMo (random view)": 0.6519607843137255,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person walks slowly forward in a zig - zag."
  },
  "006331": {
    "text": "a person is stumbling forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1329686471458482,
      "Minus Multimodal Distance": -7.5095720291137695,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.720509291975759e-05,
      "MoBERT-F": 0.4200228393788566,
      "MoBERT-N": 0.467665156332757,
      "MoBERT-min(F/N)": 0.4200228393788566,
      "MoBERT-max(F/N)": 0.467665156332757,
      "MotionCritic": -3.1898608207702637,
      "VeMo (human-opt view)": 0.9796476825722593,
      "VeMo (max entropy view)": 0.9433290978398984,
      "VeMo (min entropy view)": 0.9796476825722593,
      "VeMo (random view)": 0.9433290978398984,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is stumbling forward."
  },
  "006332": {
    "text": "a man walks in a clockwise circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5270663553357873,
      "Minus Multimodal Distance": -10.858305931091309,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6465317205293104e-05,
      "MoBERT-F": 0.45861845971218673,
      "MoBERT-N": 0.5051712584719075,
      "MoBERT-min(F/N)": 0.45861845971218673,
      "MoBERT-max(F/N)": 0.5051712584719075,
      "MotionCritic": -11.520919799804688,
      "VeMo (human-opt view)": 0.7881944444444444,
      "VeMo (max entropy view)": 0.7881944444444444,
      "VeMo (min entropy view)": 0.8517241379310345,
      "VeMo (random view)": 0.7881944444444444,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks in a clockwise circle."
  },
  "006337": {
    "text": "a figure tip toes around while walking in a slolam like motion",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7084750792271828,
      "Minus Multimodal Distance": -7.602386474609375,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.030318984761834145,
      "MoBERT-F": 0.6647477117451674,
      "MoBERT-N": 0.40846436461555996,
      "MoBERT-min(F/N)": 0.40846436461555996,
      "MoBERT-max(F/N)": 0.6647477117451674,
      "MotionCritic": -12.133538246154785,
      "VeMo (human-opt view)": 0.8672897196261682,
      "VeMo (max entropy view)": 0.8267716535433071,
      "VeMo (min entropy view)": 0.8672897196261682,
      "VeMo (random view)": 0.8672897196261682,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure tiptoes around while walking in a slalom - like motion."
  },
  "006343": {
    "text": "the person is hammering a nail.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7552946707347001,
      "Minus Multimodal Distance": -12.374054908752441,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3211503503262065e-05,
      "MoBERT-F": 0.4177524603393567,
      "MoBERT-N": 0.514948014408571,
      "MoBERT-min(F/N)": 0.4177524603393567,
      "MoBERT-max(F/N)": 0.514948014408571,
      "MotionCritic": -14.884342193603516,
      "VeMo (human-opt view)": 6.5343924323682435e-06,
      "VeMo (max entropy view)": 0.00027951857988357446,
      "VeMo (min entropy view)": 6.5343924323682435e-06,
      "VeMo (random view)": 6.5343924323682435e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is hammering a nail."
  },
  "006351": {
    "text": "the person holds something up to their face.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6331357541866928,
      "Minus Multimodal Distance": -10.788604736328125,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8798807761631906e-05,
      "MoBERT-F": 0.3975065182828772,
      "MoBERT-N": 0.4721907623285091,
      "MoBERT-min(F/N)": 0.3975065182828772,
      "MoBERT-max(F/N)": 0.4721907623285091,
      "MotionCritic": -7.4943108558654785,
      "VeMo (human-opt view)": 0.9604826546003017,
      "VeMo (max entropy view)": 0.9097651421508035,
      "VeMo (min entropy view)": 0.9604826546003017,
      "VeMo (random view)": 0.9097651421508035,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person holds something up to their face."
  },
  "006371": {
    "text": "the person is doing arm gestures.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0023550294180272,
      "Minus Multimodal Distance": -10.20920467376709,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.005375269800424576,
      "MoBERT-F": 0.6035510633300476,
      "MoBERT-N": 0.6098737746537747,
      "MoBERT-min(F/N)": 0.6035510633300476,
      "MoBERT-max(F/N)": 0.6098737746537747,
      "MotionCritic": -4.4550700187683105,
      "VeMo (human-opt view)": 0.7878245299910475,
      "VeMo (max entropy view)": 0.6932849364791288,
      "VeMo (min entropy view)": 0.7878245299910475,
      "VeMo (random view)": 0.7878245299910475,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is making arm gestures."
  },
  "006383": {
    "text": "subject walks in a full circle, then side steps to turn around and walk around something to avoid running into.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8290408133890466,
      "Minus Multimodal Distance": -6.919764995574951,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00011664417979773134,
      "MoBERT-F": 0.5522362438632024,
      "MoBERT-N": 0.6169520799222747,
      "MoBERT-min(F/N)": 0.5522362438632024,
      "MoBERT-max(F/N)": 0.6169520799222747,
      "MotionCritic": -11.275294303894043,
      "VeMo (human-opt view)": 0.6932668329177057,
      "VeMo (max entropy view)": 0.59375,
      "VeMo (min entropy view)": 0.6932668329177057,
      "VeMo (random view)": 0.59375,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The subject walks in a full circle. Then, it side - steps to turn around and walks around something to avoid running into it."
  },
  "006385": {
    "text": "a person standing still shuffles to the left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5533763161692523,
      "Minus Multimodal Distance": -8.110984802246094,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2753187295165844e-05,
      "MoBERT-F": 0.36469000736450563,
      "MoBERT-N": 0.4974112325461973,
      "MoBERT-min(F/N)": 0.36469000736450563,
      "MoBERT-max(F/N)": 0.4974112325461973,
      "MotionCritic": -3.461428165435791,
      "VeMo (human-opt view)": 0.9466872110939908,
      "VeMo (max entropy view)": 0.9466872110939908,
      "VeMo (min entropy view)": 0.9526627218934911,
      "VeMo (random view)": 0.9466872110939908,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person standing still shuffles to the left."
  },
  "006390": {
    "text": "a person bends down and touches their toes, then reaches up and stretches back and forth",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2205914489155718,
      "Minus Multimodal Distance": -9.333662986755371,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.611425043141935e-05,
      "MoBERT-F": 0.4190843869537513,
      "MoBERT-N": 0.49042077172038867,
      "MoBERT-min(F/N)": 0.4190843869537513,
      "MoBERT-max(F/N)": 0.49042077172038867,
      "MotionCritic": -21.009483337402344,
      "VeMo (human-opt view)": 0.00909618734275208,
      "VeMo (max entropy view)": 0.07588985896574882,
      "VeMo (min entropy view)": 0.00909618734275208,
      "VeMo (random view)": 0.00909618734275208,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends down and touches their toes, then reaches up and stretches back and forth."
  },
  "006420": {
    "text": "a figure spins rapidly with arms outstretched then adjusts sleeves",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.695599380920738,
      "Minus Multimodal Distance": -9.168343544006348,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5400093363714404e-05,
      "MoBERT-F": 0.5401443219052704,
      "MoBERT-N": 0.6053336419172899,
      "MoBERT-min(F/N)": 0.5401443219052704,
      "MoBERT-max(F/N)": 0.6053336419172899,
      "MotionCritic": -10.908430099487305,
      "VeMo (human-opt view)": 0.02298648885769433,
      "VeMo (max entropy view)": 0.03739837398373984,
      "VeMo (min entropy view)": 0.02298648885769433,
      "VeMo (random view)": 0.02298648885769433,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure spins rapidly with arms outstretched and then adjusts its sleeves."
  },
  "006426": {
    "text": "person runs in a zigzag motion and ducks under an invisible object hlfway through then returns to full height.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -3.492885210599558,
      "Minus Multimodal Distance": -4.51284646987915,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4151202524080873e-05,
      "MoBERT-F": 0.3587055548187248,
      "MoBERT-N": 0.46748998985546536,
      "MoBERT-min(F/N)": 0.3587055548187248,
      "MoBERT-max(F/N)": 0.46748998985546536,
      "MotionCritic": -19.82793426513672,
      "VeMo (human-opt view)": 0.1645021645021645,
      "VeMo (max entropy view)": 0.1645021645021645,
      "VeMo (min entropy view)": 0.14022140221402213,
      "VeMo (random view)": 0.14022140221402213,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person runs in a zigzag motion and ducks under an invisible object halfway through, then returns to full height."
  },
  "006432": {
    "text": "a man raises his right hand to his head then lowers it back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.38745009168556804,
      "Minus Multimodal Distance": -6.35731315612793,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9608043405460194e-05,
      "MoBERT-F": 0.28166528145612885,
      "MoBERT-N": 0.45862087849004396,
      "MoBERT-min(F/N)": 0.28166528145612885,
      "MoBERT-max(F/N)": 0.45862087849004396,
      "MotionCritic": -16.06627082824707,
      "VeMo (human-opt view)": 0.9959485927396635,
      "VeMo (max entropy view)": 0.9959485927396635,
      "VeMo (min entropy view)": 0.9975344402489698,
      "VeMo (random view)": 0.9975344402489698,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man raises his right hand to his head, then lowers it back down."
  },
  "006433": {
    "text": "a person lifts both arms out to their side and runs forward in a figure 8 pattern.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6301552719084778,
      "Minus Multimodal Distance": -12.125521659851074,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.902522803400643e-05,
      "MoBERT-F": 0.3247861163524953,
      "MoBERT-N": 0.46722908868022467,
      "MoBERT-min(F/N)": 0.3247861163524953,
      "MoBERT-max(F/N)": 0.46722908868022467,
      "MotionCritic": -4.92199182510376,
      "VeMo (human-opt view)": 0.0002867094721458072,
      "VeMo (max entropy view)": 0.039644312708410526,
      "VeMo (min entropy view)": 0.0002867094721458072,
      "VeMo (random view)": 0.0002867094721458072,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lifts both arms out to the sides and runs forward in a figure - 8 pattern."
  },
  "006457": {
    "text": "a person catches a ball with their left arm then throws it with their right arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7106945273401221,
      "Minus Multimodal Distance": -3.438776969909668,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0031256864313036203,
      "MoBERT-F": 0.6965179369011975,
      "MoBERT-N": 0.5693638659038801,
      "MoBERT-min(F/N)": 0.5693638659038801,
      "MoBERT-max(F/N)": 0.6965179369011975,
      "MotionCritic": -4.699406623840332,
      "VeMo (human-opt view)": 0.009743654079371054,
      "VeMo (max entropy view)": 0.07585675779745861,
      "VeMo (min entropy view)": 0.009743654079371054,
      "VeMo (random view)": 0.009743654079371054,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person catches a ball with their left arm and then throws it with their right arm."
  },
  "006473": {
    "text": "the sim is walking in a circle motion.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.45968163466865,
      "Minus Multimodal Distance": -4.378310680389404,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9302619099617004,
      "MoBERT-F": 0.7396244290644634,
      "MoBERT-N": 0.5544413251028053,
      "MoBERT-min(F/N)": 0.5544413251028053,
      "MoBERT-max(F/N)": 0.7396244290644634,
      "MotionCritic": -8.575992584228516,
      "VeMo (human-opt view)": 0.7058823529411765,
      "VeMo (max entropy view)": 0.7058823529411765,
      "VeMo (min entropy view)": 0.7303370786516854,
      "VeMo (random view)": 0.7058823529411765,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking in a circular motion."
  },
  "006514": {
    "text": "a person puts their hands on their knee, then gets up and walks towards the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.074621632083517,
      "Minus Multimodal Distance": -6.338169574737549,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.5962431866209954e-05,
      "MoBERT-F": 0.3165864999133717,
      "MoBERT-N": 0.38708490348344937,
      "MoBERT-min(F/N)": 0.3165864999133717,
      "MoBERT-max(F/N)": 0.38708490348344937,
      "MotionCritic": 2.330653667449951,
      "VeMo (human-opt view)": 0.6227544910179641,
      "VeMo (max entropy view)": 0.6227544910179641,
      "VeMo (min entropy view)": 0.36324167872648333,
      "VeMo (random view)": 0.6227544910179641,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person puts their hands on their knees, then gets up and walks towards the right."
  },
  "006518": {
    "text": "a person boastfully meanders across a room in a confident manner.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1235313170157926,
      "Minus Multimodal Distance": -4.7654805183410645,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.334554483240936e-05,
      "MoBERT-F": 0.3770572715990965,
      "MoBERT-N": 0.4484156066654902,
      "MoBERT-min(F/N)": 0.3770572715990965,
      "MoBERT-max(F/N)": 0.4484156066654902,
      "MotionCritic": -7.798649787902832,
      "VeMo (human-opt view)": 0.035175879396984924,
      "VeMo (max entropy view)": 0.1191827468785471,
      "VeMo (min entropy view)": 0.035175879396984924,
      "VeMo (random view)": 0.035175879396984924,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person boastfully meanders across a room in a confident manner."
  },
  "006521": {
    "text": "moving hands in a random pattern.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0247233891379426,
      "Minus Multimodal Distance": -9.225631713867188,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00020288846280891448,
      "MoBERT-F": 0.5336160556457074,
      "MoBERT-N": 0.49384491545852377,
      "MoBERT-min(F/N)": 0.49384491545852377,
      "MoBERT-max(F/N)": 0.5336160556457074,
      "MotionCritic": -6.9410176277160645,
      "VeMo (human-opt view)": 0.8080808080808081,
      "VeMo (max entropy view)": 0.8080808080808081,
      "VeMo (min entropy view)": 0.8350045578851413,
      "VeMo (random view)": 0.8350045578851413,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is moving hands in a random pattern."
  },
  "006567": {
    "text": "a person sits on a chair behind them and then stands back up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0441795320479363,
      "Minus Multimodal Distance": -2.3910419940948486,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7173560738447122e-05,
      "MoBERT-F": 0.38828983194944866,
      "MoBERT-N": 0.44283754563255384,
      "MoBERT-min(F/N)": 0.38828983194944866,
      "MoBERT-max(F/N)": 0.44283754563255384,
      "MotionCritic": -7.290622234344482,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.3073770491803279,
      "VeMo (random view)": 0.3073770491803279,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits on a chair behind them and then stands back up."
  },
  "006568": {
    "text": "a person wipes down a tabletop.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4125812400030432,
      "Minus Multimodal Distance": -4.9189534187316895,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3200662326416932e-05,
      "MoBERT-F": 0.5179780693431728,
      "MoBERT-N": 0.5119913739818445,
      "MoBERT-min(F/N)": 0.5119913739818445,
      "MoBERT-max(F/N)": 0.5179780693431728,
      "MotionCritic": -8.430827140808105,
      "VeMo (human-opt view)": 0.0021746622143527707,
      "VeMo (max entropy view)": 0.025915261209378856,
      "VeMo (min entropy view)": 0.0021746622143527707,
      "VeMo (random view)": 0.0021746622143527707,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person wipes down a tabletop."
  },
  "006570": {
    "text": "a person stands still with their arms stretched out.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.41860475578830675,
      "Minus Multimodal Distance": -12.072665214538574,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.42125697695883e-05,
      "MoBERT-F": 0.38917658195326577,
      "MoBERT-N": 0.48958661590668523,
      "MoBERT-min(F/N)": 0.38917658195326577,
      "MoBERT-max(F/N)": 0.48958661590668523,
      "MotionCritic": -3.433140993118286,
      "VeMo (human-opt view)": 0.9986742453103332,
      "VeMo (max entropy view)": 0.9979559978956265,
      "VeMo (min entropy view)": 0.9986742453103332,
      "VeMo (random view)": 0.9986742453103332,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands still with their arms stretched out."
  },
  "006576": {
    "text": "person is getting into a crouch position.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.9132110758369265,
      "Minus Multimodal Distance": -11.068317413330078,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4201050109695643e-05,
      "MoBERT-F": 0.42649859798754197,
      "MoBERT-N": 0.4491159555573753,
      "MoBERT-min(F/N)": 0.42649859798754197,
      "MoBERT-max(F/N)": 0.4491159555573753,
      "MotionCritic": -6.907431125640869,
      "VeMo (human-opt view)": 0.9979512054664053,
      "VeMo (max entropy view)": 0.9973668654951905,
      "VeMo (min entropy view)": 0.9979512054664053,
      "VeMo (random view)": 0.9979512054664053,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is getting into a crouching position."
  },
  "006577": {
    "text": "the person is stopping with their left foot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9145013840415588,
      "Minus Multimodal Distance": -3.005173683166504,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6234814868075773e-05,
      "MoBERT-F": 0.3996978048147183,
      "MoBERT-N": 0.5674697990121522,
      "MoBERT-min(F/N)": 0.3996978048147183,
      "MoBERT-max(F/N)": 0.5674697990121522,
      "MotionCritic": -13.413345336914062,
      "VeMo (human-opt view)": 0.962800875273523,
      "VeMo (max entropy view)": 0.9096651926721415,
      "VeMo (min entropy view)": 0.962800875273523,
      "VeMo (random view)": 0.962800875273523,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is stopping with their left foot."
  },
  "006610": {
    "text": "a person stomps the ground with their left foot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5525185715054943,
      "Minus Multimodal Distance": -7.438714027404785,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.315486199222505e-05,
      "MoBERT-F": 0.35645004420295,
      "MoBERT-N": 0.5396666686000535,
      "MoBERT-min(F/N)": 0.35645004420295,
      "MoBERT-max(F/N)": 0.5396666686000535,
      "MotionCritic": -3.737668752670288,
      "VeMo (human-opt view)": 0.9239960172585463,
      "VeMo (max entropy view)": 0.835820895522388,
      "VeMo (min entropy view)": 0.9239960172585463,
      "VeMo (random view)": 0.835820895522388,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stomps the ground with their left foot."
  },
  "006640": {
    "text": "a person grabbed something and throw it away",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1533043891536132,
      "Minus Multimodal Distance": -5.358552932739258,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8579554557800293,
      "MoBERT-F": 0.6260602180964114,
      "MoBERT-N": 0.535780373216384,
      "MoBERT-min(F/N)": 0.535780373216384,
      "MoBERT-max(F/N)": 0.6260602180964114,
      "MotionCritic": -5.495578765869141,
      "VeMo (human-opt view)": 0.6927592954990215,
      "VeMo (max entropy view)": 0.6794625719769674,
      "VeMo (min entropy view)": 0.6927592954990215,
      "VeMo (random view)": 0.6927592954990215,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person grabbed something and threw it away."
  },
  "006648": {
    "text": "the person fills the glass with water then pours it into the plant pot",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1417476063155576,
      "Minus Multimodal Distance": -5.065963268280029,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.371824641362764e-05,
      "MoBERT-F": 0.47547416458910297,
      "MoBERT-N": 0.47957732057619457,
      "MoBERT-min(F/N)": 0.47547416458910297,
      "MoBERT-max(F/N)": 0.47957732057619457,
      "MotionCritic": -7.459640026092529,
      "VeMo (human-opt view)": 1.625910103180255e-05,
      "VeMo (max entropy view)": 2.3487696647311317e-05,
      "VeMo (min entropy view)": 1.625910103180255e-05,
      "VeMo (random view)": 2.3487696647311317e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person fills the glass with water, then pours it into the plant pot."
  },
  "006652": {
    "text": "the man is doing starjumps",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.424698371059253,
      "Minus Multimodal Distance": -8.174324989318848,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.7351520657539368,
      "MoBERT-F": 0.804477362598347,
      "MoBERT-N": 0.7850354173522225,
      "MoBERT-min(F/N)": 0.7850354173522225,
      "MoBERT-max(F/N)": 0.804477362598347,
      "MotionCritic": -4.10965633392334,
      "VeMo (human-opt view)": 0.12558139534883722,
      "VeMo (max entropy view)": 0.45336225596529284,
      "VeMo (min entropy view)": 0.12558139534883722,
      "VeMo (random view)": 0.12558139534883722,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man is doing star jumps."
  },
  "006658": {
    "text": "the person walks backwards in a straight line",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9572997583624345,
      "Minus Multimodal Distance": -12.953248977661133,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.6778334379196167,
      "MoBERT-F": 0.557172972044402,
      "MoBERT-N": 0.491382227471092,
      "MoBERT-min(F/N)": 0.491382227471092,
      "MoBERT-max(F/N)": 0.557172972044402,
      "MotionCritic": -6.3664231300354,
      "VeMo (human-opt view)": 0.7980376124284546,
      "VeMo (max entropy view)": 0.7980376124284546,
      "VeMo (min entropy view)": 0.9820419863417924,
      "VeMo (random view)": 0.9820419863417924,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person walks backwards in a straight line."
  },
  "006662": {
    "text": "a man is shot in the chest, falls to the ground, crawls and pulls himself up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.207202332250884,
      "Minus Multimodal Distance": -7.741550922393799,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.2143067073775455e-05,
      "MoBERT-F": 0.5611680453440142,
      "MoBERT-N": 0.5468132581778642,
      "MoBERT-min(F/N)": 0.5468132581778642,
      "MoBERT-max(F/N)": 0.5611680453440142,
      "MotionCritic": -14.317949295043945,
      "VeMo (human-opt view)": 0.5623003194888179,
      "VeMo (max entropy view)": 0.46860986547085204,
      "VeMo (min entropy view)": 0.5623003194888179,
      "VeMo (random view)": 0.5623003194888179,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man is shot in the chest, falls to the ground, crawls, and pulls himself up."
  },
  "006674": {
    "text": "walking forward in a diagonal line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3597147670652014,
      "Minus Multimodal Distance": -2.836651563644409,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 9.881925507215783e-05,
      "MoBERT-F": 0.5999790054481735,
      "MoBERT-N": 0.5560146181940497,
      "MoBERT-min(F/N)": 0.5560146181940497,
      "MoBERT-max(F/N)": 0.5999790054481735,
      "MotionCritic": -18.05869483947754,
      "VeMo (human-opt view)": 0.960256068284876,
      "VeMo (max entropy view)": 0.9400868306801736,
      "VeMo (min entropy view)": 0.960256068284876,
      "VeMo (random view)": 0.9400868306801736,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking forward in a diagonal line."
  },
  "006687": {
    "text": "a person raises their left hand above their head and motions downward. as if throwing an object toward the ground",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7971805595311472,
      "Minus Multimodal Distance": -10.305132865905762,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9933564066886902,
      "MoBERT-F": 0.7199943626367027,
      "MoBERT-N": 0.6821479334528303,
      "MoBERT-min(F/N)": 0.6821479334528303,
      "MoBERT-max(F/N)": 0.7199943626367027,
      "MotionCritic": -3.2474300861358643,
      "VeMo (human-opt view)": 0.9399545602077247,
      "VeMo (max entropy view)": 0.9399545602077247,
      "VeMo (min entropy view)": 0.9399545602077247,
      "VeMo (random view)": 0.9399545602077247,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person raises their left hand above their head and motions downward, as if throwing an object toward the ground."
  },
  "006692": {
    "text": "a person stands with their arms stretched out then sits down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2139614420743847,
      "Minus Multimodal Distance": -5.96055269241333,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3404327293974347e-05,
      "MoBERT-F": 0.4115812135944914,
      "MoBERT-N": 0.4518634168771281,
      "MoBERT-min(F/N)": 0.4115812135944914,
      "MoBERT-max(F/N)": 0.4518634168771281,
      "MotionCritic": -12.37054443359375,
      "VeMo (human-opt view)": 0.9985933117583603,
      "VeMo (max entropy view)": 0.9978183072478766,
      "VeMo (min entropy view)": 0.9985933117583603,
      "VeMo (random view)": 0.9978183072478766,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands with their arms stretched out, then sits down."
  },
  "006693": {
    "text": "a person runs forward to throw with the right arm",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.101983843544635,
      "Minus Multimodal Distance": -3.3879785537719727,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2604712285101414e-05,
      "MoBERT-F": 0.46714057876121473,
      "MoBERT-N": 0.5104652572917501,
      "MoBERT-min(F/N)": 0.46714057876121473,
      "MoBERT-max(F/N)": 0.5104652572917501,
      "MotionCritic": -7.60597562789917,
      "VeMo (human-opt view)": 5.314576649396007e-05,
      "VeMo (max entropy view)": 0.005199120973361205,
      "VeMo (min entropy view)": 5.314576649396007e-05,
      "VeMo (random view)": 5.314576649396007e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person runs forward and throws with the right arm."
  },
  "006705": {
    "text": "a person walks straight slowly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9236200463411073,
      "Minus Multimodal Distance": -10.486412048339844,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.302145367139019e-05,
      "MoBERT-F": 0.41795767087183217,
      "MoBERT-N": 0.5763000240198686,
      "MoBERT-min(F/N)": 0.41795767087183217,
      "MoBERT-max(F/N)": 0.5763000240198686,
      "MotionCritic": -1.1586247682571411,
      "VeMo (human-opt view)": 0.9928709142267856,
      "VeMo (max entropy view)": 0.9928709142267856,
      "VeMo (min entropy view)": 0.9937030966962822,
      "VeMo (random view)": 0.9928709142267856,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks straight and slowly."
  },
  "006712": {
    "text": "a person stands in a defensive stance with right arm and leg forward, then uses the right forearm for a block across the body.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6083473196783511,
      "Minus Multimodal Distance": -7.466557502746582,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0001794213749235496,
      "MoBERT-F": 0.4300709077086247,
      "MoBERT-N": 0.5114508154993013,
      "MoBERT-min(F/N)": 0.4300709077086247,
      "MoBERT-max(F/N)": 0.5114508154993013,
      "MotionCritic": -0.9680661559104919,
      "VeMo (human-opt view)": 0.8356545961002786,
      "VeMo (max entropy view)": 0.7047970479704797,
      "VeMo (min entropy view)": 0.8356545961002786,
      "VeMo (random view)": 0.8356545961002786,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands in a defensive stance with the right arm and leg forward, then uses the right forearm to block across the body."
  },
  "006718": {
    "text": "a person moves into a fighting stance.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0614370309467684,
      "Minus Multimodal Distance": -6.1644206047058105,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00010319162538507953,
      "MoBERT-F": 0.5563479234022769,
      "MoBERT-N": 0.5236677739846779,
      "MoBERT-min(F/N)": 0.5236677739846779,
      "MoBERT-max(F/N)": 0.5563479234022769,
      "MotionCritic": -7.449256420135498,
      "VeMo (human-opt view)": 0.9284750337381916,
      "VeMo (max entropy view)": 0.8874868559411146,
      "VeMo (min entropy view)": 0.9284750337381916,
      "VeMo (random view)": 0.9284750337381916,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves into a fighting stance."
  },
  "006726": {
    "text": "slowly wake on the left side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3819165946973144,
      "Minus Multimodal Distance": -9.489204406738281,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3006597984931432e-05,
      "MoBERT-F": 0.4120352018657746,
      "MoBERT-N": 0.5402757372798406,
      "MoBERT-min(F/N)": 0.4120352018657746,
      "MoBERT-max(F/N)": 0.5402757372798406,
      "MotionCritic": -2.916982889175415,
      "VeMo (human-opt view)": 0.37735849056603776,
      "VeMo (max entropy view)": 0.37735849056603776,
      "VeMo (min entropy view)": 0.23371647509578544,
      "VeMo (random view)": 0.23371647509578544,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person slowly wakes on the left side."
  },
  "006735": {
    "text": "the man makes a gesture and ties his laces",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.064638064793031,
      "Minus Multimodal Distance": -10.334975242614746,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0020582028664648533,
      "MoBERT-F": 0.4949247353847728,
      "MoBERT-N": 0.49210995303526656,
      "MoBERT-min(F/N)": 0.49210995303526656,
      "MoBERT-max(F/N)": 0.4949247353847728,
      "MotionCritic": -4.979464530944824,
      "VeMo (human-opt view)": 0.07165504121750159,
      "VeMo (max entropy view)": 0.07165504121750159,
      "VeMo (min entropy view)": 0.04452882292026234,
      "VeMo (random view)": 0.04452882292026234,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man makes a gesture and ties his laces."
  },
  "006741": {
    "text": "a person does jumping jacks, stumbles and then backs up to continue.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.412360986667973,
      "Minus Multimodal Distance": -7.024960517883301,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9701792001724243,
      "MoBERT-F": 0.7557238514692742,
      "MoBERT-N": 0.6310166902679022,
      "MoBERT-min(F/N)": 0.6310166902679022,
      "MoBERT-max(F/N)": 0.7557238514692742,
      "MotionCritic": -5.157405376434326,
      "VeMo (human-opt view)": 0.8181818181818182,
      "VeMo (max entropy view)": 0.7975708502024291,
      "VeMo (min entropy view)": 0.8181818181818182,
      "VeMo (random view)": 0.8181818181818182,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does jumping jacks, stumbles, and then backs up to continue."
  },
  "006755": {
    "text": "a person moves to the left side and then to the right side, then stops",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5000152325374552,
      "Minus Multimodal Distance": -4.5106048583984375,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.6056109070777893,
      "MoBERT-F": 0.612763456515987,
      "MoBERT-N": 0.6039183104731052,
      "MoBERT-min(F/N)": 0.6039183104731052,
      "MoBERT-max(F/N)": 0.612763456515987,
      "MotionCritic": -2.2822508811950684,
      "VeMo (human-opt view)": 0.8992460589444825,
      "VeMo (max entropy view)": 0.8992460589444825,
      "VeMo (min entropy view)": 0.9049773755656109,
      "VeMo (random view)": 0.9049773755656109,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person moves to the left side, then to the right side, and then stops."
  },
  "006759": {
    "text": "a figure steps backward slowly and carefully",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7845268963195359,
      "Minus Multimodal Distance": -4.798918724060059,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.012189934961497784,
      "MoBERT-F": 0.6034287507894907,
      "MoBERT-N": 0.5887003365565582,
      "MoBERT-min(F/N)": 0.5887003365565582,
      "MoBERT-max(F/N)": 0.6034287507894907,
      "MotionCritic": -5.66367244720459,
      "VeMo (human-opt view)": 0.6798029556650246,
      "VeMo (max entropy view)": 0.6798029556650246,
      "VeMo (min entropy view)": 0.9098712446351931,
      "VeMo (random view)": 0.9098712446351931,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure steps backward slowly and carefully."
  },
  "006765": {
    "text": "a standing person seems to be shoved from behind, then regains their balance after a quick stumble.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.719641298804426,
      "Minus Multimodal Distance": -6.968874931335449,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.313027471245732e-05,
      "MoBERT-F": 0.4776322359903675,
      "MoBERT-N": 0.6176434873948842,
      "MoBERT-min(F/N)": 0.4776322359903675,
      "MoBERT-max(F/N)": 0.6176434873948842,
      "MotionCritic": -4.430243015289307,
      "VeMo (human-opt view)": 0.9669621273166801,
      "VeMo (max entropy view)": 0.9285404937202252,
      "VeMo (min entropy view)": 0.9669621273166801,
      "VeMo (random view)": 0.9669621273166801,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A standing person seems to be shoved from behind. Then, they regain their balance after a quick stumble."
  },
  "006774": {
    "text": "a person raised the hand and start to make some pushs",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1755280795663456,
      "Minus Multimodal Distance": -9.573894500732422,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3530481485067867e-05,
      "MoBERT-F": 0.37234558601190015,
      "MoBERT-N": 0.47103389282425007,
      "MoBERT-min(F/N)": 0.37234558601190015,
      "MoBERT-max(F/N)": 0.47103389282425007,
      "MotionCritic": -14.877302169799805,
      "VeMo (human-opt view)": 0.9604802401200601,
      "VeMo (max entropy view)": 0.9497427779976256,
      "VeMo (min entropy view)": 0.9604802401200601,
      "VeMo (random view)": 0.9497427779976256,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raised the hand and started to make some pushes."
  },
  "006784": {
    "text": "a person picks a drink up with their right hand and finishes it.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6232040145089309,
      "Minus Multimodal Distance": -12.2919921875,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.008776133763604e-05,
      "MoBERT-F": 0.37740776034464896,
      "MoBERT-N": 0.4566372516739518,
      "MoBERT-min(F/N)": 0.37740776034464896,
      "MoBERT-max(F/N)": 0.4566372516739518,
      "MotionCritic": -8.951457977294922,
      "VeMo (human-opt view)": 1.1499245030335893e-05,
      "VeMo (max entropy view)": 1.7744456678880626e-05,
      "VeMo (min entropy view)": 1.1499245030335893e-05,
      "VeMo (random view)": 1.7744456678880626e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person picks up a drink with their right hand and finishes it."
  },
  "006819": {
    "text": "person brings left forearm up to their eyesight like they are checking the time on a watch.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6143644866240418,
      "Minus Multimodal Distance": -7.833598613739014,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8676633519353345e-05,
      "MoBERT-F": 0.33919300341858,
      "MoBERT-N": 0.5039923289909108,
      "MoBERT-min(F/N)": 0.33919300341858,
      "MoBERT-max(F/N)": 0.5039923289909108,
      "MotionCritic": -7.388142108917236,
      "VeMo (human-opt view)": 0.7306122448979592,
      "VeMo (max entropy view)": 0.7306122448979592,
      "VeMo (min entropy view)": 0.7436743674367436,
      "VeMo (random view)": 0.7436743674367436,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person brings their left forearm up to eye level as if they are checking the time on a watch."
  },
  "006836": {
    "text": "the person turns on the faucet to wash the dishes",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8916076347237569,
      "Minus Multimodal Distance": -10.493972778320312,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.001765561057254672,
      "MoBERT-F": 0.5178548038078535,
      "MoBERT-N": 0.564675312946875,
      "MoBERT-min(F/N)": 0.5178548038078535,
      "MoBERT-max(F/N)": 0.564675312946875,
      "MotionCritic": -8.13967514038086,
      "VeMo (human-opt view)": 8.954419714853531e-06,
      "VeMo (max entropy view)": 4.995436177868462e-05,
      "VeMo (min entropy view)": 8.954419714853531e-06,
      "VeMo (random view)": 8.954419714853531e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person turns on the faucet to wash the dishes."
  },
  "006839": {
    "text": "a person walks around and then crouches down with their arms forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5334641335961388,
      "Minus Multimodal Distance": -9.25844955444336,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0009506435017101467,
      "MoBERT-F": 0.5241322226576645,
      "MoBERT-N": 0.40764690680094245,
      "MoBERT-min(F/N)": 0.40764690680094245,
      "MoBERT-max(F/N)": 0.5241322226576645,
      "MotionCritic": -14.548303604125977,
      "VeMo (human-opt view)": 0.9966075301707358,
      "VeMo (max entropy view)": 0.9859212405631503,
      "VeMo (min entropy view)": 0.9966075301707358,
      "VeMo (random view)": 0.9966075301707358,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks around and then crouches down with their arms forward."
  },
  "006844": {
    "text": "a person falls to their knees.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0245621509591403,
      "Minus Multimodal Distance": -7.410553455352783,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3306884031626396e-05,
      "MoBERT-F": 0.4115448470457509,
      "MoBERT-N": 0.5355599511366972,
      "MoBERT-min(F/N)": 0.4115448470457509,
      "MoBERT-max(F/N)": 0.5355599511366972,
      "MotionCritic": -5.689792156219482,
      "VeMo (human-opt view)": 0.9979594944419065,
      "VeMo (max entropy view)": 0.9979594944419065,
      "VeMo (min entropy view)": 0.9990908486152072,
      "VeMo (random view)": 0.9979594944419065,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person falls to their knees."
  },
  "006853": {
    "text": "sitting down and crossing legs then sitting on the floor.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.506195699870086,
      "Minus Multimodal Distance": -3.16580867767334,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4169083189917728e-05,
      "MoBERT-F": 0.36914199554703586,
      "MoBERT-N": 0.47679478992715196,
      "MoBERT-min(F/N)": 0.36914199554703586,
      "MoBERT-max(F/N)": 0.47679478992715196,
      "MotionCritic": -3.95737361907959,
      "VeMo (human-opt view)": 0.9283516483516484,
      "VeMo (max entropy view)": 0.8170212765957446,
      "VeMo (min entropy view)": 0.9283516483516484,
      "VeMo (random view)": 0.8170212765957446,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is sitting down, crossing their legs, and then sitting on the floor."
  },
  "006912": {
    "text": "a person walks, stepping onto a big object.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4200552709328715,
      "Minus Multimodal Distance": -4.848074436187744,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3365055312751792e-05,
      "MoBERT-F": 0.45990791399520875,
      "MoBERT-N": 0.5768562877682135,
      "MoBERT-min(F/N)": 0.45990791399520875,
      "MoBERT-max(F/N)": 0.5768562877682135,
      "MotionCritic": -7.587971210479736,
      "VeMo (human-opt view)": 0.14788004136504654,
      "VeMo (max entropy view)": 0.14788004136504654,
      "VeMo (min entropy view)": 0.10687022900763359,
      "VeMo (random view)": 0.14788004136504654,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks and steps onto a big object."
  },
  "006967": {
    "text": "a person standing in a square, walks forward in a diagonal pattern.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1343574484090646,
      "Minus Multimodal Distance": -8.916833877563477,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.4583063097670674e-05,
      "MoBERT-F": 0.5661037097363851,
      "MoBERT-N": 0.5959872028710487,
      "MoBERT-min(F/N)": 0.5661037097363851,
      "MoBERT-max(F/N)": 0.5959872028710487,
      "MotionCritic": -18.05869483947754,
      "VeMo (human-opt view)": 0.9580041580041581,
      "VeMo (max entropy view)": 0.9281553398058252,
      "VeMo (min entropy view)": 0.9580041580041581,
      "VeMo (random view)": 0.9281553398058252,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person standing in a square walks forward in a diagonal pattern."
  },
  "006980": {
    "text": "a person walks forward, steps over something with his right leg, and then he continues walking forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8926289207692502,
      "Minus Multimodal Distance": -6.609996795654297,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.6683275538962334e-05,
      "MoBERT-F": 0.46625716842126685,
      "MoBERT-N": 0.6118587462201368,
      "MoBERT-min(F/N)": 0.46625716842126685,
      "MoBERT-max(F/N)": 0.6118587462201368,
      "MotionCritic": -6.327281951904297,
      "VeMo (human-opt view)": 0.9552797939592601,
      "VeMo (max entropy view)": 0.9433106575963719,
      "VeMo (min entropy view)": 0.9552797939592601,
      "VeMo (random view)": 0.9433106575963719,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, steps over something with his right leg, and then continues walking forward."
  },
  "006987": {
    "text": "a person tapping on a surface",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5888843520934642,
      "Minus Multimodal Distance": -10.673855781555176,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.983015969628468e-05,
      "MoBERT-F": 0.5014797190333483,
      "MoBERT-N": 0.5522146247309181,
      "MoBERT-min(F/N)": 0.5014797190333483,
      "MoBERT-max(F/N)": 0.5522146247309181,
      "MotionCritic": -8.13561725616455,
      "VeMo (human-opt view)": 0.5307443365695793,
      "VeMo (max entropy view)": 0.5307443365695793,
      "VeMo (min entropy view)": 0.6653919694072657,
      "VeMo (random view)": 0.6653919694072657,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is tapping on a surface."
  },
  "007001": {
    "text": "i person side steps to his right, sliding his feet across the ground.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0799169361284577,
      "Minus Multimodal Distance": -7.191921234130859,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.013608349487185478,
      "MoBERT-F": 0.6681747011357899,
      "MoBERT-N": 0.7415714360058967,
      "MoBERT-min(F/N)": 0.6681747011357899,
      "MoBERT-max(F/N)": 0.7415714360058967,
      "MotionCritic": -9.743096351623535,
      "VeMo (human-opt view)": 0.9363519863306279,
      "VeMo (max entropy view)": 0.928598701794578,
      "VeMo (min entropy view)": 0.9363519863306279,
      "VeMo (random view)": 0.928598701794578,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person side - steps to his right, sliding his feet across the ground."
  },
  "007020": {
    "text": "a man walks unsteadily forward then turns around and walks back",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0710582964329143,
      "Minus Multimodal Distance": -4.950761795043945,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.2784890208859e-05,
      "MoBERT-F": 0.46322412831542714,
      "MoBERT-N": 0.5512028168551959,
      "MoBERT-min(F/N)": 0.46322412831542714,
      "MoBERT-max(F/N)": 0.5512028168551959,
      "MotionCritic": -11.876031875610352,
      "VeMo (human-opt view)": 0.9554615576013934,
      "VeMo (max entropy view)": 0.8736196319018404,
      "VeMo (min entropy view)": 0.9554615576013934,
      "VeMo (random view)": 0.9554615576013934,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks unsteadily forward, then turns around and walks back."
  },
  "007027": {
    "text": "a person appears to scratch their head.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5435385193055884,
      "Minus Multimodal Distance": -2.342442035675049,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.772012835019268e-05,
      "MoBERT-F": 0.3416756220184919,
      "MoBERT-N": 0.45481148997968196,
      "MoBERT-min(F/N)": 0.3416756220184919,
      "MoBERT-max(F/N)": 0.45481148997968196,
      "MotionCritic": -5.717113494873047,
      "VeMo (human-opt view)": 0.00011616745887163389,
      "VeMo (max entropy view)": 0.0038108528796092535,
      "VeMo (min entropy view)": 0.00011616745887163389,
      "VeMo (random view)": 0.00011616745887163389,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person appears to scratch their head."
  },
  "007037": {
    "text": "man walks in a circular motion then stops right before completing the circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2638480872281788,
      "Minus Multimodal Distance": -12.270200729370117,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0008458101074211299,
      "MoBERT-F": 0.5546584830756287,
      "MoBERT-N": 0.5549311308462338,
      "MoBERT-min(F/N)": 0.5546584830756287,
      "MoBERT-max(F/N)": 0.5549311308462338,
      "MotionCritic": -10.724161148071289,
      "VeMo (human-opt view)": 0.8736616702355461,
      "VeMo (max entropy view)": 0.8736616702355461,
      "VeMo (min entropy view)": 0.9198012775017743,
      "VeMo (random view)": 0.8736616702355461,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks in a circular motion and then stops right before completing the circle."
  },
  "007063": {
    "text": "a person in a t shape, bends down to the right and back up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4612476899531797,
      "Minus Multimodal Distance": -4.810019493103027,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4243507141363807e-05,
      "MoBERT-F": 0.5121937744700684,
      "MoBERT-N": 0.5512725737536391,
      "MoBERT-min(F/N)": 0.5121937744700684,
      "MoBERT-max(F/N)": 0.5512725737536391,
      "MotionCritic": -9.742326736450195,
      "VeMo (human-opt view)": 0.19230769230769232,
      "VeMo (max entropy view)": 0.2336065573770492,
      "VeMo (min entropy view)": 0.19230769230769232,
      "VeMo (random view)": 0.19230769230769232,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person in a T - shape bends down to the right and then straightens back up."
  },
  "007089": {
    "text": "a person laying face down on the ground and then slowly crawling backwards",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6820607531111567,
      "Minus Multimodal Distance": -3.9044458866119385,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.034214023500681e-05,
      "MoBERT-F": 0.4015331877668364,
      "MoBERT-N": 0.4200475472846266,
      "MoBERT-min(F/N)": 0.4015331877668364,
      "MoBERT-max(F/N)": 0.4200475472846266,
      "MotionCritic": -6.10911226272583,
      "VeMo (human-opt view)": 0.9796893667861409,
      "VeMo (max entropy view)": 0.9784172661870504,
      "VeMo (min entropy view)": 0.9796893667861409,
      "VeMo (random view)": 0.9796893667861409,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is lying face down on the ground and then slowly crawling backwards."
  },
  "007095": {
    "text": "a person walks around obstacles.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5414919301757701,
      "Minus Multimodal Distance": -5.701579570770264,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.07978598028421402,
      "MoBERT-F": 0.6676381225677348,
      "MoBERT-N": 0.5541871772955107,
      "MoBERT-min(F/N)": 0.5541871772955107,
      "MoBERT-max(F/N)": 0.6676381225677348,
      "MotionCritic": -14.40617561340332,
      "VeMo (human-opt view)": 0.03741029164757978,
      "VeMo (max entropy view)": 0.09517923362175525,
      "VeMo (min entropy view)": 0.03741029164757978,
      "VeMo (random view)": 0.03741029164757978,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks around obstacles."
  },
  "007097": {
    "text": "person was walking down the path around things.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6499216576010602,
      "Minus Multimodal Distance": -6.698602199554443,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0049120294861495495,
      "MoBERT-F": 0.7016263604710504,
      "MoBERT-N": 0.5996689711111582,
      "MoBERT-min(F/N)": 0.5996689711111582,
      "MoBERT-max(F/N)": 0.7016263604710504,
      "MotionCritic": -6.299108028411865,
      "VeMo (human-opt view)": 0.7987421383647799,
      "VeMo (max entropy view)": 0.7777777777777778,
      "VeMo (min entropy view)": 0.7987421383647799,
      "VeMo (random view)": 0.7777777777777778,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person was walking down the path around some things."
  },
  "007121": {
    "text": "a person walks up four steps with their hands by their sides and their lean forward slightly as they go up the stairs and once they've stopped going up the stairs, they straighten up again",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0647766165544785,
      "Minus Multimodal Distance": -6.109142303466797,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.645099786808714e-05,
      "MoBERT-F": 0.5166674290568831,
      "MoBERT-N": 0.5025302598864438,
      "MoBERT-min(F/N)": 0.5025302598864438,
      "MoBERT-max(F/N)": 0.5166674290568831,
      "MotionCritic": -13.181198120117188,
      "VeMo (human-opt view)": 0.013215859030837005,
      "VeMo (max entropy view)": 0.027596899224806203,
      "VeMo (min entropy view)": 0.013215859030837005,
      "VeMo (random view)": 0.027596899224806203,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks up four steps with their hands by their sides. They lean forward slightly as they go up the stairs. Once they stop going up the stairs, they straighten up again."
  },
  "007126": {
    "text": "a person runs forward and stops short.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6725571321025332,
      "Minus Multimodal Distance": -9.143678665161133,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.657617708086036e-05,
      "MoBERT-F": 0.43787522499739,
      "MoBERT-N": 0.529831748186538,
      "MoBERT-min(F/N)": 0.43787522499739,
      "MoBERT-max(F/N)": 0.529831748186538,
      "MotionCritic": -13.984857559204102,
      "VeMo (human-opt view)": 0.7659574468085106,
      "VeMo (max entropy view)": 0.48534201954397393,
      "VeMo (min entropy view)": 0.7659574468085106,
      "VeMo (random view)": 0.48534201954397393,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person runs forward and stops short."
  },
  "007134": {
    "text": "the figure is walking in a counter clockwise motion slowly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4464455953556579,
      "Minus Multimodal Distance": -13.05284309387207,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.8544232160784304e-05,
      "MoBERT-F": 0.5251475541374817,
      "MoBERT-N": 0.5015553343290526,
      "MoBERT-min(F/N)": 0.5015553343290526,
      "MoBERT-max(F/N)": 0.5251475541374817,
      "MotionCritic": -9.546427726745605,
      "VeMo (human-opt view)": 0.8990578734858681,
      "VeMo (max entropy view)": 0.8990578734858681,
      "VeMo (min entropy view)": 0.8990936555891239,
      "VeMo (random view)": 0.8990936555891239,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The figure is walking in a counter - clockwise motion slowly."
  },
  "007157": {
    "text": "a person makes several hand gestures and appears to move objects around.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7939888595048668,
      "Minus Multimodal Distance": -11.237245559692383,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4356106223422103e-05,
      "MoBERT-F": 0.4490741581325421,
      "MoBERT-N": 0.522728961471203,
      "MoBERT-min(F/N)": 0.4490741581325421,
      "MoBERT-max(F/N)": 0.522728961471203,
      "MotionCritic": -8.824295043945312,
      "VeMo (human-opt view)": 0.7312883435582822,
      "VeMo (max entropy view)": 0.6519174041297935,
      "VeMo (min entropy view)": 0.7312883435582822,
      "VeMo (random view)": 0.7312883435582822,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person makes several hand gestures and appears to move objects around."
  },
  "007199": {
    "text": "it is a person walking backwards.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9707733473249295,
      "Minus Multimodal Distance": -10.138113975524902,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.016299527138471603,
      "MoBERT-F": 0.646962810932831,
      "MoBERT-N": 0.5938756914791834,
      "MoBERT-min(F/N)": 0.5938756914791834,
      "MoBERT-max(F/N)": 0.646962810932831,
      "MotionCritic": -6.369789123535156,
      "VeMo (human-opt view)": 0.7058823529411765,
      "VeMo (max entropy view)": 0.7058823529411765,
      "VeMo (min entropy view)": 0.9724957555178269,
      "VeMo (random view)": 0.9724957555178269,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "It is a person walking backwards."
  },
  "007216": {
    "text": "a person looks to be petting a dog with right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6262784634604237,
      "Minus Multimodal Distance": -11.273757934570312,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.982231282861903e-05,
      "MoBERT-F": 0.377344456777832,
      "MoBERT-N": 0.36652226718828246,
      "MoBERT-min(F/N)": 0.36652226718828246,
      "MoBERT-max(F/N)": 0.377344456777832,
      "MotionCritic": -6.763312816619873,
      "VeMo (human-opt view)": 1.4374014965339828e-05,
      "VeMo (max entropy view)": 0.00019137878675017046,
      "VeMo (min entropy view)": 1.4374014965339828e-05,
      "VeMo (random view)": 0.00019137878675017046,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person looks to be petting a dog with their right hand."
  },
  "007232": {
    "text": "a shorter figure jumps and high fives someone taller than him out of excitement",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1215912649980042,
      "Minus Multimodal Distance": -3.313058853149414,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8090065717697144,
      "MoBERT-F": 0.6674595411570654,
      "MoBERT-N": 0.7805336302007825,
      "MoBERT-min(F/N)": 0.6674595411570654,
      "MoBERT-max(F/N)": 0.7805336302007825,
      "MotionCritic": -6.862137317657471,
      "VeMo (human-opt view)": 2.6702167834568627e-05,
      "VeMo (max entropy view)": 4.975867044832562e-05,
      "VeMo (min entropy view)": 2.6702167834568627e-05,
      "VeMo (random view)": 2.6702167834568627e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A shorter figure jumps and high fives someone taller than him out of excitement"
  },
  "007281": {
    "text": "a person bends slightly at the shoulders and mimics the movements of picking something up with both arms moving it from their left side to their right side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3531795897926189,
      "Minus Multimodal Distance": -5.961180210113525,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8364047466311604e-05,
      "MoBERT-F": 0.3111492531756693,
      "MoBERT-N": 0.38220441510748965,
      "MoBERT-min(F/N)": 0.3111492531756693,
      "MoBERT-max(F/N)": 0.38220441510748965,
      "MotionCritic": -2.4445641040802,
      "VeMo (human-opt view)": 0.9326633165829146,
      "VeMo (max entropy view)": 0.2804733727810651,
      "VeMo (min entropy view)": 0.9326633165829146,
      "VeMo (random view)": 0.9326633165829146,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person bends slightly at the shoulders and mimics the movements of picking something up, moving it from their left side to their right side with both arms."
  },
  "007283": {
    "text": "a person lifts up their left leg while shifting on their right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0233564346459525,
      "Minus Multimodal Distance": -6.736517429351807,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.2101987017085776e-05,
      "MoBERT-F": 0.3731010926305155,
      "MoBERT-N": 0.4622388586725929,
      "MoBERT-min(F/N)": 0.3731010926305155,
      "MoBERT-max(F/N)": 0.4622388586725929,
      "MotionCritic": -1.5346707105636597,
      "VeMo (human-opt view)": 0.9097472924187726,
      "VeMo (max entropy view)": 0.9097472924187726,
      "VeMo (min entropy view)": 0.9149736644093304,
      "VeMo (random view)": 0.9149736644093304,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person lifts their left leg while shifting their weight onto their right."
  },
  "007286": {
    "text": "a person stands at the edge of a cliff trying to steel themselves for a jump, but failing to jump off.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4637018404403304,
      "Minus Multimodal Distance": -3.2471508979797363,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.012844011187553406,
      "MoBERT-F": 0.6430049595303914,
      "MoBERT-N": 0.5952497507922332,
      "MoBERT-min(F/N)": 0.5952497507922332,
      "MoBERT-max(F/N)": 0.6430049595303914,
      "MotionCritic": -8.978893280029297,
      "VeMo (human-opt view)": 0.438871473354232,
      "VeMo (max entropy view)": 0.46846846846846846,
      "VeMo (min entropy view)": 0.438871473354232,
      "VeMo (random view)": 0.438871473354232,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands at the edge of a cliff, trying to steel themselves for a jump but failing to jump off."
  },
  "007301": {
    "text": "moving hands and jumping.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5472918453010922,
      "Minus Multimodal Distance": -10.586904525756836,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.12505399051588e-05,
      "MoBERT-F": 0.49668360137289697,
      "MoBERT-N": 0.5516334346948865,
      "MoBERT-min(F/N)": 0.49668360137289697,
      "MoBERT-max(F/N)": 0.5516334346948865,
      "MotionCritic": -12.138957023620605,
      "VeMo (human-opt view)": 0.024353969139245212,
      "VeMo (max entropy view)": 0.03508080409932992,
      "VeMo (min entropy view)": 0.024353969139245212,
      "VeMo (random view)": 0.03508080409932992,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is moving hands and jumping."
  },
  "007354": {
    "text": "he starts to crawl a lot",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.152091888938927,
      "Minus Multimodal Distance": -9.016215324401855,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.1781655251979828,
      "MoBERT-F": 0.7133789146759384,
      "MoBERT-N": 0.6771090748532546,
      "MoBERT-min(F/N)": 0.6771090748532546,
      "MoBERT-max(F/N)": 0.7133789146759384,
      "MotionCritic": -8.09603500366211,
      "VeMo (human-opt view)": 0.8174904942965779,
      "VeMo (max entropy view)": 0.7544783983140148,
      "VeMo (min entropy view)": 0.8174904942965779,
      "VeMo (random view)": 0.8174904942965779,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person starts to crawl a lot."
  },
  "007355": {
    "text": "the person extending their left leg.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4622884907419722,
      "Minus Multimodal Distance": -9.824286460876465,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.204836684744805e-05,
      "MoBERT-F": 0.5475448515839634,
      "MoBERT-N": 0.5332921042559462,
      "MoBERT-min(F/N)": 0.5332921042559462,
      "MoBERT-max(F/N)": 0.5475448515839634,
      "MotionCritic": -9.391007423400879,
      "VeMo (human-opt view)": 0.8597748208802457,
      "VeMo (max entropy view)": 0.8597748208802457,
      "VeMo (min entropy view)": 0.8670309653916212,
      "VeMo (random view)": 0.8670309653916212,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is extending their left leg."
  },
  "007409": {
    "text": "the person is trying to hit a bug.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.033269665067836,
      "Minus Multimodal Distance": -9.114825248718262,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.014324952848255634,
      "MoBERT-F": 0.675309763989652,
      "MoBERT-N": 0.6212022872766084,
      "MoBERT-min(F/N)": 0.6212022872766084,
      "MoBERT-max(F/N)": 0.675309763989652,
      "MotionCritic": -6.807603359222412,
      "VeMo (human-opt view)": 0.19160104986876642,
      "VeMo (max entropy view)": 0.46808510638297873,
      "VeMo (min entropy view)": 0.19160104986876642,
      "VeMo (random view)": 0.19160104986876642,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is trying to hit a bug."
  },
  "007423": {
    "text": "a figure seems to be gesturing for people to run past or through or under their other arm",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.44336215983651,
      "Minus Multimodal Distance": -4.471561908721924,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.24831490218639374,
      "MoBERT-F": 0.7108835404124649,
      "MoBERT-N": 0.6399223258559327,
      "MoBERT-min(F/N)": 0.6399223258559327,
      "MoBERT-max(F/N)": 0.7108835404124649,
      "MotionCritic": -5.798581123352051,
      "VeMo (human-opt view)": 0.02599873176918199,
      "VeMo (max entropy view)": 0.08021390374331551,
      "VeMo (min entropy view)": 0.02599873176918199,
      "VeMo (random view)": 0.08021390374331551,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure seems to be gesturing for people to run past, through, or under their other arm."
  },
  "007445": {
    "text": "a person does arm curls",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5406719542657871,
      "Minus Multimodal Distance": -8.43954849243164,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.6436398659134284e-05,
      "MoBERT-F": 0.33210942957755385,
      "MoBERT-N": 0.45441707301714135,
      "MoBERT-min(F/N)": 0.33210942957755385,
      "MoBERT-max(F/N)": 0.45441707301714135,
      "MotionCritic": -4.181681156158447,
      "VeMo (human-opt view)": 0.006695069993913573,
      "VeMo (max entropy view)": 0.006695069993913573,
      "VeMo (min entropy view)": 0.004340953002283391,
      "VeMo (random view)": 0.006695069993913573,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does arm curls."
  },
  "007450": {
    "text": "the person had his hands up in his rocking back-and-forth.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9132330498537307,
      "Minus Multimodal Distance": -8.25286865234375,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6325937142246403e-05,
      "MoBERT-F": 0.40905716817941407,
      "MoBERT-N": 0.49451825109747577,
      "MoBERT-min(F/N)": 0.40905716817941407,
      "MoBERT-max(F/N)": 0.49451825109747577,
      "MotionCritic": -0.21789780259132385,
      "VeMo (human-opt view)": 0.6510538641686182,
      "VeMo (max entropy view)": 0.562874251497006,
      "VeMo (min entropy view)": 0.6510538641686182,
      "VeMo (random view)": 0.6510538641686182,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person had his hands up as he was rocking back and forth."
  },
  "007451": {
    "text": "raising arm towards chest while standing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.574425079384782,
      "Minus Multimodal Distance": -12.628642082214355,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.823849172273185e-05,
      "MoBERT-F": 0.3563483995301189,
      "MoBERT-N": 0.4603187901102403,
      "MoBERT-min(F/N)": 0.3563483995301189,
      "MoBERT-max(F/N)": 0.4603187901102403,
      "MotionCritic": -8.126627922058105,
      "VeMo (human-opt view)": 0.9601820250284414,
      "VeMo (max entropy view)": 0.717741935483871,
      "VeMo (min entropy view)": 0.9601820250284414,
      "VeMo (random view)": 0.717741935483871,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises an arm towards the chest while standing."
  },
  "007473": {
    "text": "a person throwing a punch upwards similar to a uppercut motion",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4245270314690681,
      "Minus Multimodal Distance": -9.898324966430664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.571238837845158e-05,
      "MoBERT-F": 0.3800327560740805,
      "MoBERT-N": 0.4558745301226038,
      "MoBERT-min(F/N)": 0.3800327560740805,
      "MoBERT-max(F/N)": 0.4558745301226038,
      "MotionCritic": -9.049829483032227,
      "VeMo (human-opt view)": 0.3079754601226994,
      "VeMo (max entropy view)": 0.3079754601226994,
      "VeMo (min entropy view)": 0.30758620689655175,
      "VeMo (random view)": 0.3079754601226994,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person throwing a punch upwards, similar to an uppercut motion."
  },
  "007513": {
    "text": "a person swings their right arm over their head as if they were spiking a volleyball.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1258227660786322,
      "Minus Multimodal Distance": -7.929458141326904,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5363733584526926e-05,
      "MoBERT-F": 0.44164151226981874,
      "MoBERT-N": 0.5157001013887927,
      "MoBERT-min(F/N)": 0.44164151226981874,
      "MoBERT-max(F/N)": 0.5157001013887927,
      "MotionCritic": -13.236577987670898,
      "VeMo (human-opt view)": 0.0003676889566449565,
      "VeMo (max entropy view)": 0.0007583530356425926,
      "VeMo (min entropy view)": 0.0003676889566449565,
      "VeMo (random view)": 0.0007583530356425926,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person swings their right arm over their head as if they were spiking a volleyball."
  },
  "007516": {
    "text": "standing on one leg and hopping.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2758262410543353,
      "Minus Multimodal Distance": -12.640785217285156,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3103026251192205e-05,
      "MoBERT-F": 0.4397411025489516,
      "MoBERT-N": 0.6040117148951778,
      "MoBERT-min(F/N)": 0.4397411025489516,
      "MoBERT-max(F/N)": 0.6040117148951778,
      "MotionCritic": -4.343078136444092,
      "VeMo (human-opt view)": 0.0005526875903996989,
      "VeMo (max entropy view)": 0.0005526875903996989,
      "VeMo (min entropy view)": 0.00046131320492334845,
      "VeMo (random view)": 0.0005526875903996989,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is standing on one leg and hopping."
  },
  "007543": {
    "text": "a person slightly bent over with left hand pressing against the air walks forward slowly",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3061997206025187,
      "Minus Multimodal Distance": -10.665657997131348,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.2019433749374e-05,
      "MoBERT-F": 0.312537037449763,
      "MoBERT-N": 0.4718643190197387,
      "MoBERT-min(F/N)": 0.312537037449763,
      "MoBERT-max(F/N)": 0.4718643190197387,
      "MotionCritic": -9.630167007446289,
      "VeMo (human-opt view)": 0.7655913978494624,
      "VeMo (max entropy view)": 0.7655913978494624,
      "VeMo (min entropy view)": 0.7774538386783285,
      "VeMo (random view)": 0.7655913978494624,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person, slightly bent over with the left hand pressing against the air, walks forward slowly."
  },
  "007550": {
    "text": "a man climbs up steps.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2742261382867541,
      "Minus Multimodal Distance": -15.174820899963379,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.279036743857432e-05,
      "MoBERT-F": 0.5051216271665473,
      "MoBERT-N": 0.5851692152542656,
      "MoBERT-min(F/N)": 0.5051216271665473,
      "MoBERT-max(F/N)": 0.5851692152542656,
      "MotionCritic": -8.325732231140137,
      "VeMo (human-opt view)": 9.94857672651697e-05,
      "VeMo (max entropy view)": 0.0002612653493392737,
      "VeMo (min entropy view)": 9.94857672651697e-05,
      "VeMo (random view)": 0.0002612653493392737,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man climbs up the steps."
  },
  "007551": {
    "text": "a figure seems to jog on a treadmill",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4960493651956501,
      "Minus Multimodal Distance": -8.483447074890137,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.2640076824463904e-05,
      "MoBERT-F": 0.4681241384523006,
      "MoBERT-N": 0.5233178402771217,
      "MoBERT-min(F/N)": 0.4681241384523006,
      "MoBERT-max(F/N)": 0.5233178402771217,
      "MotionCritic": -7.295976161956787,
      "VeMo (human-opt view)": 0.12568306010928962,
      "VeMo (max entropy view)": 0.12568306010928962,
      "VeMo (min entropy view)": 0.060240963855421686,
      "VeMo (random view)": 0.060240963855421686,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure seems to be jogging on a treadmill."
  },
  "007556": {
    "text": "sits down than brushes off knees",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.371391453653285,
      "Minus Multimodal Distance": -10.809569358825684,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.670512731128838e-05,
      "MoBERT-F": 0.451815158817766,
      "MoBERT-N": 0.4768807074432618,
      "MoBERT-min(F/N)": 0.451815158817766,
      "MoBERT-max(F/N)": 0.4768807074432618,
      "MotionCritic": -11.930549621582031,
      "VeMo (human-opt view)": 0.7983539094650206,
      "VeMo (max entropy view)": 0.7657754010695187,
      "VeMo (min entropy view)": 0.7983539094650206,
      "VeMo (random view)": 0.7657754010695187,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits down and then brushes off knees."
  },
  "007559": {
    "text": "a person is walking, turns back and to their left, proceeds to walk again, trips, then turns back once more, limping now.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.3008563066149237,
      "Minus Multimodal Distance": -9.158079147338867,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.02959183044731617,
      "MoBERT-F": 0.5778274795863291,
      "MoBERT-N": 0.5041552065724563,
      "MoBERT-min(F/N)": 0.5041552065724563,
      "MoBERT-max(F/N)": 0.5778274795863291,
      "MotionCritic": -10.175856590270996,
      "VeMo (human-opt view)": 0.9096744612563045,
      "VeMo (max entropy view)": 0.874430709173715,
      "VeMo (min entropy view)": 0.9096744612563045,
      "VeMo (random view)": 0.9096744612563045,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking, turns back and to their left, proceeds to walk again, trips, then turns back once more, now limping."
  },
  "007585": {
    "text": "a person who is standing with his hands by his sides takes two steps forward, angles to his left as he continues walking and then stops.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3410245153161635,
      "Minus Multimodal Distance": -10.731795310974121,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2978267224971205e-05,
      "MoBERT-F": 0.44075930320226087,
      "MoBERT-N": 0.6091288684294358,
      "MoBERT-min(F/N)": 0.44075930320226087,
      "MoBERT-max(F/N)": 0.6091288684294358,
      "MotionCritic": -21.948137283325195,
      "VeMo (human-opt view)": 0.9830425165888425,
      "VeMo (max entropy view)": 0.9783783783783784,
      "VeMo (min entropy view)": 0.9830425165888425,
      "VeMo (random view)": 0.9830425165888425,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who is standing with their hands by their sides takes two steps forward, angles to their left as they continue walking, and then stops."
  },
  "007597": {
    "text": "the figure raises its left arm and makes reaching motions as if grabbing for something, and then extends its arms twice.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5783282808899619,
      "Minus Multimodal Distance": -8.0411958694458,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7414769647293724e-05,
      "MoBERT-F": 0.47367308260582125,
      "MoBERT-N": 0.5135210177168745,
      "MoBERT-min(F/N)": 0.47367308260582125,
      "MoBERT-max(F/N)": 0.5135210177168745,
      "MotionCritic": -6.200274467468262,
      "VeMo (human-opt view)": 0.9147072383586906,
      "VeMo (max entropy view)": 0.9147072383586906,
      "VeMo (min entropy view)": 0.936198347107438,
      "VeMo (random view)": 0.9147072383586906,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The figure raises its left arm and makes reaching motions as if grabbing for something, and then extends its arms twice."
  },
  "007598": {
    "text": "a person walks in a counter clockwise circle and turns around to look after possibly tripping over something.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.084184804519547,
      "Minus Multimodal Distance": -8.527973175048828,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.696671046782285e-05,
      "MoBERT-F": 0.435232425911498,
      "MoBERT-N": 0.5178677909222609,
      "MoBERT-min(F/N)": 0.435232425911498,
      "MoBERT-max(F/N)": 0.5178677909222609,
      "MotionCritic": -10.175856590270996,
      "VeMo (human-opt view)": 0.8992700729927007,
      "VeMo (max entropy view)": 0.8523908523908524,
      "VeMo (min entropy view)": 0.8992700729927007,
      "VeMo (random view)": 0.8523908523908524,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks in a counter - clockwise circle and turns around to look after possibly tripping over something."
  },
  "007617": {
    "text": "the person standing still with his arms crossed.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4852170556104164,
      "Minus Multimodal Distance": -11.00969123840332,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3020927983452566e-05,
      "MoBERT-F": 0.3542868517710923,
      "MoBERT-N": 0.5010357650852155,
      "MoBERT-min(F/N)": 0.3542868517710923,
      "MoBERT-max(F/N)": 0.5010357650852155,
      "MotionCritic": -4.76655912399292,
      "VeMo (human-opt view)": 0.9783869267264101,
      "VeMo (max entropy view)": 0.1831357048748353,
      "VeMo (min entropy view)": 0.9783869267264101,
      "VeMo (random view)": 0.1831357048748353,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is standing still with his arms crossed."
  },
  "007625": {
    "text": "a person who is prone pushes himself up off the ground using his arms and propping himself with his knees before standing awkwardly in a ready to wrestle position.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.9522783280771367,
      "Minus Multimodal Distance": -7.179359436035156,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0002449538733344525,
      "MoBERT-F": 0.43724880662497423,
      "MoBERT-N": 0.3805683138620529,
      "MoBERT-min(F/N)": 0.3805683138620529,
      "MoBERT-max(F/N)": 0.43724880662497423,
      "MotionCritic": -7.586302280426025,
      "VeMo (human-opt view)": 0.8934269304403318,
      "VeMo (max entropy view)": 0.8934269304403318,
      "VeMo (min entropy view)": 0.9281553398058252,
      "VeMo (random view)": 0.8934269304403318,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who is prone pushes himself up off the ground using his arms and props himself on his knees before standing awkwardly in a ready - to - wrestle position."
  },
  "007626": {
    "text": "a  man stands on the ground ,walks anticlockwise and then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.329667673896279,
      "Minus Multimodal Distance": -8.751932144165039,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.4155280445702374e-05,
      "MoBERT-F": 0.4329128804909271,
      "MoBERT-N": 0.5224598516120693,
      "MoBERT-min(F/N)": 0.4329128804909271,
      "MoBERT-max(F/N)": 0.5224598516120693,
      "MotionCritic": -10.564567565917969,
      "VeMo (human-opt view)": 0.9049773755656109,
      "VeMo (max entropy view)": 0.9045362220717671,
      "VeMo (min entropy view)": 0.9049773755656109,
      "VeMo (random view)": 0.9045362220717671,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man stands on the ground, walks anticlockwise, and then stops."
  },
  "007628": {
    "text": "person moves forward two steps then does a full turn and faces opposite direction",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0278546859487125,
      "Minus Multimodal Distance": -9.29483413696289,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.608363502076827e-05,
      "MoBERT-F": 0.40133108956267194,
      "MoBERT-N": 0.6058763206580371,
      "MoBERT-min(F/N)": 0.40133108956267194,
      "MoBERT-max(F/N)": 0.6058763206580371,
      "MotionCritic": -12.78177261352539,
      "VeMo (human-opt view)": 0.9554615576013934,
      "VeMo (max entropy view)": 0.7883211678832117,
      "VeMo (min entropy view)": 0.9554615576013934,
      "VeMo (random view)": 0.7883211678832117,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves forward two steps, then does a full turn and faces the opposite direction."
  },
  "007630": {
    "text": "person is holding his head because he feels dizzy.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2191583891571038,
      "Minus Multimodal Distance": -9.50883960723877,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.4981159842573106e-05,
      "MoBERT-F": 0.42538920127846463,
      "MoBERT-N": 0.507393776717282,
      "MoBERT-min(F/N)": 0.42538920127846463,
      "MoBERT-max(F/N)": 0.507393776717282,
      "MotionCritic": -7.507970333099365,
      "VeMo (human-opt view)": 0.8522727272727273,
      "VeMo (max entropy view)": 0.8522727272727273,
      "VeMo (min entropy view)": 0.966900702106319,
      "VeMo (random view)": 0.8522727272727273,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is holding his head because he feels dizzy."
  },
  "007644": {
    "text": "a person walks in a left diagonal then stops with hands slightly raised.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.18677111307186,
      "Minus Multimodal Distance": -8.511406898498535,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3093822164810263e-05,
      "MoBERT-F": 0.45453064270249,
      "MoBERT-N": 0.4863266065627036,
      "MoBERT-min(F/N)": 0.45453064270249,
      "MoBERT-max(F/N)": 0.4863266065627036,
      "MotionCritic": -5.5062947273254395,
      "VeMo (human-opt view)": 0.8668866886688669,
      "VeMo (max entropy view)": 0.8668866886688669,
      "VeMo (min entropy view)": 0.909952606635071,
      "VeMo (random view)": 0.8668866886688669,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks diagonally to the left and then stops with their hands slightly raised."
  },
  "007655": {
    "text": "figure appears to be fighting or dancing",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6057547026454466,
      "Minus Multimodal Distance": -9.234535217285156,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.012415596283972263,
      "MoBERT-F": 0.6315956159425418,
      "MoBERT-N": 0.6795224997318987,
      "MoBERT-min(F/N)": 0.6315956159425418,
      "MoBERT-max(F/N)": 0.6795224997318987,
      "MotionCritic": -4.060165882110596,
      "VeMo (human-opt view)": 0.9243466299862448,
      "VeMo (max entropy view)": 0.9243466299862448,
      "VeMo (min entropy view)": 0.9464344396976668,
      "VeMo (random view)": 0.9464344396976668,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The figure appears to be either fighting or dancing."
  },
  "007661": {
    "text": "a person moves their arms in a strange manner.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.8340268713063477,
      "Minus Multimodal Distance": -12.140758514404297,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.751352025778033e-05,
      "MoBERT-F": 0.3929483665690606,
      "MoBERT-N": 0.3436090594714797,
      "MoBERT-min(F/N)": 0.3436090594714797,
      "MoBERT-max(F/N)": 0.3929483665690606,
      "MotionCritic": -10.312934875488281,
      "VeMo (human-opt view)": 0.8174273858921162,
      "VeMo (max entropy view)": 0.8174273858921162,
      "VeMo (min entropy view)": 0.8737201365187713,
      "VeMo (random view)": 0.8737201365187713,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves his or her arms in a strange manner."
  },
  "007666": {
    "text": "the person lifts his right hand up and puts it back down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6062218229587575,
      "Minus Multimodal Distance": -6.668400287628174,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.352235216880217e-05,
      "MoBERT-F": 0.3292311363737668,
      "MoBERT-N": 0.5042445073572647,
      "MoBERT-min(F/N)": 0.3292311363737668,
      "MoBERT-max(F/N)": 0.5042445073572647,
      "MotionCritic": -4.590999126434326,
      "VeMo (human-opt view)": 0.9045521292217328,
      "VeMo (max entropy view)": 0.9045521292217328,
      "VeMo (min entropy view)": 0.9850317124735729,
      "VeMo (random view)": 0.9045521292217328,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person lifts his right hand up and puts it back down."
  },
  "007675": {
    "text": "person is in t stance, brings arms in, then returns to t",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9720652984521668,
      "Minus Multimodal Distance": -5.675596237182617,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 9.557564044371247e-05,
      "MoBERT-F": 0.41052072704623904,
      "MoBERT-N": 0.4882820308771986,
      "MoBERT-min(F/N)": 0.41052072704623904,
      "MoBERT-max(F/N)": 0.4882820308771986,
      "MotionCritic": -15.914665222167969,
      "VeMo (human-opt view)": 0.9324452901998097,
      "VeMo (max entropy view)": 0.8936859147328656,
      "VeMo (min entropy view)": 0.9324452901998097,
      "VeMo (random view)": 0.9324452901998097,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is in a T stance, brings the arms in, then returns to the T stance."
  },
  "007676": {
    "text": "a person in a defensive pose leans right then left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7789845799245786,
      "Minus Multimodal Distance": -5.219709873199463,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3928483642521314e-05,
      "MoBERT-F": 0.39917232060309704,
      "MoBERT-N": 0.5416142095725126,
      "MoBERT-min(F/N)": 0.39917232060309704,
      "MoBERT-max(F/N)": 0.5416142095725126,
      "MotionCritic": -2.1692581176757812,
      "VeMo (human-opt view)": 0.9399848062800709,
      "VeMo (max entropy view)": 0.9046941678520626,
      "VeMo (min entropy view)": 0.9399848062800709,
      "VeMo (random view)": 0.9399848062800709,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person in a defensive pose leans to the right and then to the left."
  },
  "007689": {
    "text": "a person walks forward and raises their arms in victory.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1476325821045597,
      "Minus Multimodal Distance": -9.485823631286621,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00012365594739094377,
      "MoBERT-F": 0.5063754011552862,
      "MoBERT-N": 0.465770099298692,
      "MoBERT-min(F/N)": 0.465770099298692,
      "MoBERT-max(F/N)": 0.5063754011552862,
      "MotionCritic": -9.494750022888184,
      "VeMo (human-opt view)": 0.9400778210116731,
      "VeMo (max entropy view)": 0.9323216995447648,
      "VeMo (min entropy view)": 0.9400778210116731,
      "VeMo (random view)": 0.9400778210116731,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and raises their arms in victory."
  },
  "007693": {
    "text": "the person claps and puts their hands down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5430106449778296,
      "Minus Multimodal Distance": -8.673648834228516,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.964890154544264e-05,
      "MoBERT-F": 0.34241308266028614,
      "MoBERT-N": 0.48250623702232176,
      "MoBERT-min(F/N)": 0.34241308266028614,
      "MoBERT-max(F/N)": 0.48250623702232176,
      "MotionCritic": -5.85979700088501,
      "VeMo (human-opt view)": 0.9434650455927052,
      "VeMo (max entropy view)": 0.7310549777117384,
      "VeMo (min entropy view)": 0.9434650455927052,
      "VeMo (random view)": 0.7310549777117384,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person claps and puts their hands down."
  },
  "007742": {
    "text": "a person waves a friendly hello.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.43203729197610286,
      "Minus Multimodal Distance": -11.135831832885742,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7886182579095475e-05,
      "MoBERT-F": 0.37333624121520226,
      "MoBERT-N": 0.3593440538617353,
      "MoBERT-min(F/N)": 0.3593440538617353,
      "MoBERT-max(F/N)": 0.37333624121520226,
      "MotionCritic": -11.257108688354492,
      "VeMo (human-opt view)": 0.9964043444415613,
      "VeMo (max entropy view)": 0.9883306547411083,
      "VeMo (min entropy view)": 0.9964043444415613,
      "VeMo (random view)": 0.9964043444415613,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person waves a friendly hello."
  },
  "007772": {
    "text": "a person is using binoculars",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6315449865020692,
      "Minus Multimodal Distance": -5.660757541656494,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.0133915061014704e-05,
      "MoBERT-F": 0.33090518173624406,
      "MoBERT-N": 0.4635932876668396,
      "MoBERT-min(F/N)": 0.33090518173624406,
      "MoBERT-max(F/N)": 0.4635932876668396,
      "MotionCritic": -3.4844579696655273,
      "VeMo (human-opt view)": 0.008059516429014259,
      "VeMo (max entropy view)": 0.009110219494925655,
      "VeMo (min entropy view)": 0.008059516429014259,
      "VeMo (random view)": 0.008059516429014259,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is using binoculars."
  },
  "007776": {
    "text": "the person is shaking out both her arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9346017521246381,
      "Minus Multimodal Distance": -9.723790168762207,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00012643152149394155,
      "MoBERT-F": 0.5456916779353385,
      "MoBERT-N": 0.6402816235075528,
      "MoBERT-min(F/N)": 0.5456916779353385,
      "MoBERT-max(F/N)": 0.6402816235075528,
      "MotionCritic": -7.036092758178711,
      "VeMo (human-opt view)": 0.8602150537634409,
      "VeMo (max entropy view)": 0.8171724648408586,
      "VeMo (min entropy view)": 0.8602150537634409,
      "VeMo (random view)": 0.8602150537634409,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is shaking out both her arms."
  },
  "007777": {
    "text": "a person leans their body moving their right arm above their head past their left shoulder.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.844166535004707,
      "Minus Multimodal Distance": -6.532168865203857,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 8.834461186779663e-05,
      "MoBERT-F": 0.42747141779176934,
      "MoBERT-N": 0.42331109738797223,
      "MoBERT-min(F/N)": 0.42331109738797223,
      "MoBERT-max(F/N)": 0.42747141779176934,
      "MotionCritic": -3.410897731781006,
      "VeMo (human-opt view)": 0.7878787878787878,
      "VeMo (max entropy view)": 0.7878787878787878,
      "VeMo (min entropy view)": 0.8352490421455939,
      "VeMo (random view)": 0.7878787878787878,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person leans their body while moving their right arm above their head and past their left shoulder."
  },
  "007779": {
    "text": "a person sits down and applauds.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5579878175921889,
      "Minus Multimodal Distance": -7.867633819580078,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2662299670628272e-05,
      "MoBERT-F": 0.3731764551286299,
      "MoBERT-N": 0.44807410098579625,
      "MoBERT-min(F/N)": 0.3731764551286299,
      "MoBERT-max(F/N)": 0.44807410098579625,
      "MotionCritic": 0.0004049771523568779,
      "VeMo (human-opt view)": 0.6654205607476635,
      "VeMo (max entropy view)": 0.6654205607476635,
      "VeMo (min entropy view)": 0.09011808576755749,
      "VeMo (random view)": 0.09011808576755749,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits down and applauds."
  },
  "007806": {
    "text": "a person walking slowly across something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1637432561575547,
      "Minus Multimodal Distance": -14.747803688049316,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9789083003997803,
      "MoBERT-F": 0.803583460019484,
      "MoBERT-N": 0.5716311826474059,
      "MoBERT-min(F/N)": 0.5716311826474059,
      "MoBERT-max(F/N)": 0.803583460019484,
      "MotionCritic": -6.369845867156982,
      "VeMo (human-opt view)": 0.982048417132216,
      "VeMo (max entropy view)": 0.982048417132216,
      "VeMo (min entropy view)": 0.9831469052945563,
      "VeMo (random view)": 0.982048417132216,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking slowly across something."
  },
  "007812": {
    "text": "a person grabbedsomething and lean it somehwere",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6456536360979754,
      "Minus Multimodal Distance": -7.454914569854736,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.237175405956805e-05,
      "MoBERT-F": 0.5010223606966755,
      "MoBERT-N": 0.546289483573521,
      "MoBERT-min(F/N)": 0.5010223606966755,
      "MoBERT-max(F/N)": 0.546289483573521,
      "MotionCritic": -4.945462226867676,
      "VeMo (human-opt view)": 0.5928338762214984,
      "VeMo (max entropy view)": 0.5928338762214984,
      "VeMo (min entropy view)": 0.7985927880386984,
      "VeMo (random view)": 0.5928338762214984,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person grabbed something and left it somewhere."
  },
  "007818": {
    "text": "a person wipes with their right hand .",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8380297262528343,
      "Minus Multimodal Distance": -10.313701629638672,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 6.862033478682861e-05,
      "MoBERT-F": 0.3931491626268481,
      "MoBERT-N": 0.39176272218871894,
      "MoBERT-min(F/N)": 0.39176272218871894,
      "MoBERT-max(F/N)": 0.3931491626268481,
      "MotionCritic": -10.905607223510742,
      "VeMo (human-opt view)": 0.09045226130653267,
      "VeMo (max entropy view)": 0.09045226130653267,
      "VeMo (min entropy view)": 0.0904048807542984,
      "VeMo (random view)": 0.09045226130653267,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person wipes with their right hand."
  },
  "007820": {
    "text": "a person jumps and spins 180 degrees.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1196936226587106,
      "Minus Multimodal Distance": -15.76117992401123,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.763644624792505e-05,
      "MoBERT-F": 0.48868076775487157,
      "MoBERT-N": 0.5589034833348703,
      "MoBERT-min(F/N)": 0.48868076775487157,
      "MoBERT-max(F/N)": 0.5589034833348703,
      "MotionCritic": -12.065114974975586,
      "VeMo (human-opt view)": 0.04477611940298507,
      "VeMo (max entropy view)": 0.3079710144927536,
      "VeMo (min entropy view)": 0.04477611940298507,
      "VeMo (random view)": 0.04477611940298507,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person jumps and spins 180 degrees."
  },
  "007822": {
    "text": "a person jumps from side to side left to right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.8214569096673165,
      "Minus Multimodal Distance": -11.157954216003418,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.04059167206287384,
      "MoBERT-F": 0.6045718101528614,
      "MoBERT-N": 0.6209322050425107,
      "MoBERT-min(F/N)": 0.6045718101528614,
      "MoBERT-max(F/N)": 0.6209322050425107,
      "MotionCritic": -5.474212169647217,
      "VeMo (human-opt view)": 0.5928753180661578,
      "VeMo (max entropy view)": 0.5928753180661578,
      "VeMo (min entropy view)": 0.7976366322008862,
      "VeMo (random view)": 0.5928753180661578,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps from side to side, left to right."
  },
  "007862": {
    "text": "a person crosses their arms, then brings their arms back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4837726550347898,
      "Minus Multimodal Distance": -13.646480560302734,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3129789042286575e-05,
      "MoBERT-F": 0.32303834543726534,
      "MoBERT-N": 0.5267025492596984,
      "MoBERT-min(F/N)": 0.32303834543726534,
      "MoBERT-max(F/N)": 0.5267025492596984,
      "MotionCritic": -5.798643589019775,
      "VeMo (human-opt view)": 0.999594409728667,
      "VeMo (max entropy view)": 0.8174204355108877,
      "VeMo (min entropy view)": 0.999594409728667,
      "VeMo (random view)": 0.999594409728667,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person crosses their arms, then brings their arms back down."
  },
  "007878": {
    "text": "person backed up and is doing a salute",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6424435835393627,
      "Minus Multimodal Distance": -11.927521705627441,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.023772945627570152,
      "MoBERT-F": 0.5690456087198569,
      "MoBERT-N": 0.5263476323645083,
      "MoBERT-min(F/N)": 0.5263476323645083,
      "MoBERT-max(F/N)": 0.5690456087198569,
      "MotionCritic": -8.949156761169434,
      "VeMo (human-opt view)": 8.501686977601698e-05,
      "VeMo (max entropy view)": 0.0006277233801758542,
      "VeMo (min entropy view)": 8.501686977601698e-05,
      "VeMo (random view)": 8.501686977601698e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person backed up and is saluting."
  },
  "007889": {
    "text": "the body slides to the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7289575226528033,
      "Minus Multimodal Distance": -11.943936347961426,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.237917396996636e-05,
      "MoBERT-F": 0.3589500767683357,
      "MoBERT-N": 0.49806690427458267,
      "MoBERT-min(F/N)": 0.3589500767683357,
      "MoBERT-max(F/N)": 0.49806690427458267,
      "MotionCritic": -2.7071449756622314,
      "VeMo (human-opt view)": 0.7046070460704607,
      "VeMo (max entropy view)": 0.7046070460704607,
      "VeMo (min entropy view)": 0.8076009501187649,
      "VeMo (random view)": 0.7046070460704607,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The body slides to the left."
  },
  "007890": {
    "text": "a person glides in circles in a counter-clockwise.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.9688266552983356,
      "Minus Multimodal Distance": -8.277274131774902,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.175079473294318e-05,
      "MoBERT-F": 0.4090591755749543,
      "MoBERT-N": 0.447992220660637,
      "MoBERT-min(F/N)": 0.4090591755749543,
      "MoBERT-max(F/N)": 0.447992220660637,
      "MotionCritic": -7.600892066955566,
      "VeMo (human-opt view)": 0.00027812938124726835,
      "VeMo (max entropy view)": 0.00103263435922184,
      "VeMo (min entropy view)": 0.00027812938124726835,
      "VeMo (random view)": 0.00103263435922184,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person glides in circles in a counter - clockwise direction."
  },
  "007941": {
    "text": "moving the left hand on stand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5165300980303212,
      "Minus Multimodal Distance": -9.182281494140625,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0002705124206840992,
      "MoBERT-F": 0.43953649578956944,
      "MoBERT-N": 0.5097639615911805,
      "MoBERT-min(F/N)": 0.43953649578956944,
      "MoBERT-max(F/N)": 0.5097639615911805,
      "MotionCritic": -1.9795074462890625,
      "VeMo (human-opt view)": 0.8742004264392325,
      "VeMo (max entropy view)": 0.8742004264392325,
      "VeMo (min entropy view)": 0.9194683346364347,
      "VeMo (random view)": 0.8742004264392325,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is moving the left hand while standing."
  },
  "007946": {
    "text": "a person is sitting with the chin in hand, stands up, and then returns to sitting",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.506665408241217,
      "Minus Multimodal Distance": -3.0625252723693848,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4239208869403228e-05,
      "MoBERT-F": 0.2885506870619876,
      "MoBERT-N": 0.4073419621560405,
      "MoBERT-min(F/N)": 0.2885506870619876,
      "MoBERT-max(F/N)": 0.4073419621560405,
      "MotionCritic": -5.433651924133301,
      "VeMo (human-opt view)": 0.172978505629478,
      "VeMo (max entropy view)": 0.7426556991774383,
      "VeMo (min entropy view)": 0.172978505629478,
      "VeMo (random view)": 0.7426556991774383,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is sitting with the chin in hand, stands up, and then returns to sitting."
  },
  "007970": {
    "text": "the person was pushed but didnt fall",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9536431963341535,
      "Minus Multimodal Distance": -7.3820061683654785,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4038406991166994e-05,
      "MoBERT-F": 0.4647126428897322,
      "MoBERT-N": 0.567838698031323,
      "MoBERT-min(F/N)": 0.4647126428897322,
      "MoBERT-max(F/N)": 0.567838698031323,
      "MotionCritic": -4.329597473144531,
      "VeMo (human-opt view)": 0.9399744572158365,
      "VeMo (max entropy view)": 0.928598701794578,
      "VeMo (min entropy view)": 0.9399744572158365,
      "VeMo (random view)": 0.9399744572158365,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was pushed but didn't fall."
  },
  "007995": {
    "text": "movin  right hand upward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0386581245980429,
      "Minus Multimodal Distance": -2.1032357215881348,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.7804896894376725e-05,
      "MoBERT-F": 0.3085377172976135,
      "MoBERT-N": 0.4442362177317182,
      "MoBERT-min(F/N)": 0.3085377172976135,
      "MoBERT-max(F/N)": 0.4442362177317182,
      "MotionCritic": -15.13487434387207,
      "VeMo (human-opt view)": 0.9908416921064108,
      "VeMo (max entropy view)": 0.9902478888586216,
      "VeMo (min entropy view)": 0.9908416921064108,
      "VeMo (random view)": 0.9908416921064108,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "a person moving right hand upward"
  },
  "007999": {
    "text": "a person doges to the left, then doges to the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4366495946282456,
      "Minus Multimodal Distance": -10.763763427734375,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8773777484893799,
      "MoBERT-F": 0.6150798948391909,
      "MoBERT-N": 0.4970380939176867,
      "MoBERT-min(F/N)": 0.4970380939176867,
      "MoBERT-max(F/N)": 0.6150798948391909,
      "MotionCritic": -11.83130168914795,
      "VeMo (human-opt view)": 0.7431693989071039,
      "VeMo (max entropy view)": 0.7431693989071039,
      "VeMo (min entropy view)": 0.7774193548387097,
      "VeMo (random view)": 0.7431693989071039,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person dodges to the left, then dodges to the right."
  },
  "008002": {
    "text": "a man is standing still and then starts walking forward before turning around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1177039614983273,
      "Minus Multimodal Distance": -9.284192085266113,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.3609903261531144e-05,
      "MoBERT-F": 0.4978122424514695,
      "MoBERT-N": 0.6070928285228624,
      "MoBERT-min(F/N)": 0.4978122424514695,
      "MoBERT-max(F/N)": 0.6070928285228624,
      "MotionCritic": -6.930275917053223,
      "VeMo (human-opt view)": 0.988306703561546,
      "VeMo (max entropy view)": 0.9648550028807374,
      "VeMo (min entropy view)": 0.988306703561546,
      "VeMo (random view)": 0.988306703561546,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is standing still, and then he starts walking forward before turning around."
  },
  "008006": {
    "text": "a person does a threatening crouching walk and then raises their arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8309063832438943,
      "Minus Multimodal Distance": -11.244325637817383,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.004025283269584179,
      "MoBERT-F": 0.6937356475751555,
      "MoBERT-N": 0.5925306202838412,
      "MoBERT-min(F/N)": 0.5925306202838412,
      "MoBERT-max(F/N)": 0.6937356475751555,
      "MotionCritic": -6.299108028411865,
      "VeMo (human-opt view)": 0.30638852672750977,
      "VeMo (max entropy view)": 0.30638852672750977,
      "VeMo (min entropy view)": 0.24508050089445438,
      "VeMo (random view)": 0.24508050089445438,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does a threatening crouching walk and then raises their arms."
  },
  "008009": {
    "text": "a person picks something up with his right hand and bring it up to his face",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5586815065016657,
      "Minus Multimodal Distance": -8.278315544128418,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.275514245615341e-05,
      "MoBERT-F": 0.38287171797707187,
      "MoBERT-N": 0.4531481102814322,
      "MoBERT-min(F/N)": 0.38287171797707187,
      "MoBERT-max(F/N)": 0.4531481102814322,
      "MotionCritic": -8.3367338180542,
      "VeMo (human-opt view)": 0.7777777777777778,
      "VeMo (max entropy view)": 0.7777777777777778,
      "VeMo (min entropy view)": 0.00021682257551462904,
      "VeMo (random view)": 0.00021682257551462904,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person picks something up with his right hand and brings it up to his face."
  },
  "008016": {
    "text": "a man reaches over and picks up some wet soap, and washes his hands with it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8491504733942846,
      "Minus Multimodal Distance": -9.297189712524414,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.014538338407874107,
      "MoBERT-F": 0.38487107998943043,
      "MoBERT-N": 0.3937187588197282,
      "MoBERT-min(F/N)": 0.38487107998943043,
      "MoBERT-max(F/N)": 0.3937187588197282,
      "MotionCritic": -2.3483033180236816,
      "VeMo (human-opt view)": 0.02163141993957704,
      "VeMo (max entropy view)": 0.19122257053291536,
      "VeMo (min entropy view)": 0.02163141993957704,
      "VeMo (random view)": 0.02163141993957704,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man reaches over, picks up some wet soap, and washes his hands with it."
  },
  "008052": {
    "text": "a person delivers pizza, receives money, then waves goodbye",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8762609615234928,
      "Minus Multimodal Distance": -6.680098056793213,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.20649087370839e-05,
      "MoBERT-F": 0.4329496040513007,
      "MoBERT-N": 0.4313445618042915,
      "MoBERT-min(F/N)": 0.4313445618042915,
      "MoBERT-max(F/N)": 0.4329496040513007,
      "MotionCritic": -5.99695348739624,
      "VeMo (human-opt view)": 9.321624035912079e-05,
      "VeMo (max entropy view)": 0.0011705033164260631,
      "VeMo (min entropy view)": 9.321624035912079e-05,
      "VeMo (random view)": 9.321624035912079e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person delivers pizza, receives money, then waves goodbye."
  },
  "008053": {
    "text": "the person is cleaning up some thing",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.099043425988325,
      "Minus Multimodal Distance": -4.24207067489624,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.028796207159757614,
      "MoBERT-F": 0.5898515124483115,
      "MoBERT-N": 0.6132933686247694,
      "MoBERT-min(F/N)": 0.5898515124483115,
      "MoBERT-max(F/N)": 0.6132933686247694,
      "MotionCritic": -19.374298095703125,
      "VeMo (human-opt view)": 0.0003147138352055595,
      "VeMo (max entropy view)": 0.00046131320492334845,
      "VeMo (min entropy view)": 0.0003147138352055595,
      "VeMo (random view)": 0.0003147138352055595,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is cleaning up something."
  },
  "008055": {
    "text": "a figure raises their right hand in a sweeping motion",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6383152640222691,
      "Minus Multimodal Distance": -10.42951488494873,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.50613629759755e-05,
      "MoBERT-F": 0.3901944885700683,
      "MoBERT-N": 0.44059270987496796,
      "MoBERT-min(F/N)": 0.3901944885700683,
      "MoBERT-max(F/N)": 0.44059270987496796,
      "MotionCritic": -11.607477188110352,
      "VeMo (human-opt view)": 0.6081081081081081,
      "VeMo (max entropy view)": 0.6081081081081081,
      "VeMo (min entropy view)": 0.8736196319018404,
      "VeMo (random view)": 0.6081081081081081,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure raises their right hand in a sweeping motion."
  },
  "008057": {
    "text": "a person dancing and spinning side to side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.834752992064104,
      "Minus Multimodal Distance": -8.969496726989746,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9850528836250305,
      "MoBERT-F": 0.7651157974172365,
      "MoBERT-N": 0.7063113654807186,
      "MoBERT-min(F/N)": 0.7063113654807186,
      "MoBERT-max(F/N)": 0.7651157974172365,
      "MotionCritic": -6.567636489868164,
      "VeMo (human-opt view)": 0.9047013977128335,
      "VeMo (max entropy view)": 0.9047013977128335,
      "VeMo (min entropy view)": 0.9668085106382979,
      "VeMo (random view)": 0.9047013977128335,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is dancing and spinning from side to side."
  },
  "008068": {
    "text": "the person is sitting down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3218884464299503,
      "Minus Multimodal Distance": -2.9985220432281494,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.278197229723446e-05,
      "MoBERT-F": 0.43749629264805256,
      "MoBERT-N": 0.46854469348751066,
      "MoBERT-min(F/N)": 0.43749629264805256,
      "MoBERT-max(F/N)": 0.46854469348751066,
      "MotionCritic": -8.888171195983887,
      "VeMo (human-opt view)": 0.005542659529110806,
      "VeMo (max entropy view)": 0.10697674418604651,
      "VeMo (min entropy view)": 0.005542659529110806,
      "VeMo (random view)": 0.005542659529110806,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is sitting down."
  },
  "008077": {
    "text": "a person walks a path that bends left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8497087779134903,
      "Minus Multimodal Distance": -6.867562294006348,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.30927635129774e-05,
      "MoBERT-F": 0.37796677547789276,
      "MoBERT-N": 0.5665289364955409,
      "MoBERT-min(F/N)": 0.37796677547789276,
      "MoBERT-max(F/N)": 0.5665289364955409,
      "MotionCritic": -7.850574016571045,
      "VeMo (human-opt view)": 0.8934010152284264,
      "VeMo (max entropy view)": 0.8934010152284264,
      "VeMo (min entropy view)": 0.9101194217473287,
      "VeMo (random view)": 0.8934010152284264,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks along a path that bends to the left."
  },
  "008082": {
    "text": "the person was reaching to the right to pass something to the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1554601545562917,
      "Minus Multimodal Distance": -9.177204132080078,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00015942823665682226,
      "MoBERT-F": 0.5870797908106185,
      "MoBERT-N": 0.5657821552682105,
      "MoBERT-min(F/N)": 0.5657821552682105,
      "MoBERT-max(F/N)": 0.5870797908106185,
      "MotionCritic": -7.326298713684082,
      "VeMo (human-opt view)": 0.7427385892116183,
      "VeMo (max entropy view)": 0.7316017316017316,
      "VeMo (min entropy view)": 0.7427385892116183,
      "VeMo (random view)": 0.7316017316017316,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person was reaching to the right to pass something to the left."
  },
  "008084": {
    "text": "a person wlowly walked by making the circle",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3841736517059813,
      "Minus Multimodal Distance": -5.450078964233398,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.300183642655611e-05,
      "MoBERT-F": 0.582589364513328,
      "MoBERT-N": 0.6235756233495964,
      "MoBERT-min(F/N)": 0.582589364513328,
      "MoBERT-max(F/N)": 0.6235756233495964,
      "MotionCritic": -4.600835800170898,
      "VeMo (human-opt view)": 0.37714285714285717,
      "VeMo (max entropy view)": 0.37714285714285717,
      "VeMo (min entropy view)": 0.24571428571428572,
      "VeMo (random view)": 0.24571428571428572,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person slowly walked by, making a circle."
  },
  "008105": {
    "text": "a person eagerly practicing karate.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4424812463668295,
      "Minus Multimodal Distance": -2.426305055618286,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 6.591522105736658e-05,
      "MoBERT-F": 0.48270116866131296,
      "MoBERT-N": 0.5785491735758364,
      "MoBERT-min(F/N)": 0.48270116866131296,
      "MoBERT-max(F/N)": 0.5785491735758364,
      "MotionCritic": -5.4975810050964355,
      "VeMo (human-opt view)": 0.9433419614610297,
      "VeMo (max entropy view)": 0.8739837398373984,
      "VeMo (min entropy view)": 0.9433419614610297,
      "VeMo (random view)": 0.9433419614610297,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is eagerly practicing karate."
  },
  "008131": {
    "text": "moving there hands and do like a exericses.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2909210280788956,
      "Minus Multimodal Distance": -8.018217086791992,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.380608177394606e-05,
      "MoBERT-F": 0.4593203313687158,
      "MoBERT-N": 0.47315298313724985,
      "MoBERT-min(F/N)": 0.4593203313687158,
      "MoBERT-max(F/N)": 0.47315298313724985,
      "MotionCritic": -12.43712329864502,
      "VeMo (human-opt view)": 0.2571428571428571,
      "VeMo (max entropy view)": 0.32124352331606215,
      "VeMo (min entropy view)": 0.2571428571428571,
      "VeMo (random view)": 0.32124352331606215,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is moving their hands and doing something like exercises."
  },
  "008139": {
    "text": "the sim appears to be using their left hand to wipe or wash a window.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2025854855896927,
      "Minus Multimodal Distance": -10.88853931427002,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3013970348984003e-05,
      "MoBERT-F": 0.45241871041723486,
      "MoBERT-N": 0.47287780775636806,
      "MoBERT-min(F/N)": 0.45241871041723486,
      "MoBERT-max(F/N)": 0.47287780775636806,
      "MotionCritic": -10.314849853515625,
      "VeMo (human-opt view)": 0.001499397301084858,
      "VeMo (max entropy view)": 0.007127882599580713,
      "VeMo (min entropy view)": 0.001499397301084858,
      "VeMo (random view)": 0.007127882599580713,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The sim appears to be using their left hand to wipe or wash a window."
  },
  "008147": {
    "text": "this person swings both arms around and back to front.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.2633712204629015,
      "Minus Multimodal Distance": -10.732511520385742,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.0703569791512564e-05,
      "MoBERT-F": 0.5431679892470864,
      "MoBERT-N": 0.4579111080545995,
      "MoBERT-min(F/N)": 0.4579111080545995,
      "MoBERT-max(F/N)": 0.5431679892470864,
      "MotionCritic": -8.055336952209473,
      "VeMo (human-opt view)": 0.14772727272727273,
      "VeMo (max entropy view)": 0.14772727272727273,
      "VeMo (min entropy view)": 0.0015954617975536252,
      "VeMo (random view)": 0.14772727272727273,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person swings both arms around, from back to front."
  },
  "008157": {
    "text": "a person turns to their left while leaping forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.977135849411203,
      "Minus Multimodal Distance": -9.260347366333008,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.4050849378108978,
      "MoBERT-F": 0.7716327108985703,
      "MoBERT-N": 0.7270496607082936,
      "MoBERT-min(F/N)": 0.7270496607082936,
      "MoBERT-max(F/N)": 0.7716327108985703,
      "MotionCritic": -15.422815322875977,
      "VeMo (human-opt view)": 0.7306122448979592,
      "VeMo (max entropy view)": 0.717434869739479,
      "VeMo (min entropy view)": 0.7306122448979592,
      "VeMo (random view)": 0.717434869739479,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person turns to their left while leaping forward."
  },
  "008170": {
    "text": "the person is standing on his left foot bending his right knee.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8252263053473453,
      "Minus Multimodal Distance": -9.584590911865234,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.109886165475473e-05,
      "MoBERT-F": 0.3950217336618189,
      "MoBERT-N": 0.4237894852887031,
      "MoBERT-min(F/N)": 0.3950217336618189,
      "MoBERT-max(F/N)": 0.4237894852887031,
      "MotionCritic": -6.640495777130127,
      "VeMo (human-opt view)": 0.8872987477638641,
      "VeMo (max entropy view)": 0.777947932618683,
      "VeMo (min entropy view)": 0.8872987477638641,
      "VeMo (random view)": 0.777947932618683,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is standing on his left foot, bending his right knee."
  },
  "008173": {
    "text": "the sim appears to push something, then brings both hands to their right ear.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6665034187573515,
      "Minus Multimodal Distance": -11.018213272094727,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.839721466647461e-05,
      "MoBERT-F": 0.49182049308603515,
      "MoBERT-N": 0.5585813057721389,
      "MoBERT-min(F/N)": 0.49182049308603515,
      "MoBERT-max(F/N)": 0.5585813057721389,
      "MotionCritic": -3.2735133171081543,
      "VeMo (human-opt view)": 0.43872113676731794,
      "VeMo (max entropy view)": 0.43872113676731794,
      "VeMo (min entropy view)": 0.6377358490566037,
      "VeMo (random view)": 0.43872113676731794,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The sim appears to push something, then brings both hands to its right ear."
  },
  "008174": {
    "text": "a person grabbed something and made some jesture",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0275844568630466,
      "Minus Multimodal Distance": -8.405547142028809,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.30849891901016235,
      "MoBERT-F": 0.6584696457895476,
      "MoBERT-N": 0.5962596429150195,
      "MoBERT-min(F/N)": 0.5962596429150195,
      "MoBERT-max(F/N)": 0.6584696457895476,
      "MotionCritic": -11.88322925567627,
      "VeMo (human-opt view)": 0.2808219178082192,
      "VeMo (max entropy view)": 0.37756714060031593,
      "VeMo (min entropy view)": 0.2808219178082192,
      "VeMo (random view)": 0.2808219178082192,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person grabbed something and made some gesture."
  },
  "008191": {
    "text": "the man puts something down and walks",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4182708590056792,
      "Minus Multimodal Distance": -3.962894916534424,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9902533888816833,
      "MoBERT-F": 0.7909281228474115,
      "MoBERT-N": 0.5667520805805706,
      "MoBERT-min(F/N)": 0.5667520805805706,
      "MoBERT-max(F/N)": 0.7909281228474115,
      "MotionCritic": -10.989009857177734,
      "VeMo (human-opt view)": 0.5923566878980892,
      "VeMo (max entropy view)": 0.45278450363196127,
      "VeMo (min entropy view)": 0.5923566878980892,
      "VeMo (random view)": 0.45278450363196127,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man puts something down and walks."
  },
  "008205": {
    "text": "both the hand holding the right leg.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.130592036669655,
      "Minus Multimodal Distance": -7.579686641693115,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.991041638888419e-05,
      "MoBERT-F": 0.37943155156465525,
      "MoBERT-N": 0.3757402547323748,
      "MoBERT-min(F/N)": 0.3757402547323748,
      "MoBERT-max(F/N)": 0.37943155156465525,
      "MotionCritic": -10.157543182373047,
      "VeMo (human-opt view)": 0.6509695290858726,
      "VeMo (max entropy view)": 0.6509695290858726,
      "VeMo (min entropy view)": 0.6929347826086957,
      "VeMo (random view)": 0.6509695290858726,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is holding the right leg with both hands."
  },
  "008208": {
    "text": "a person quickly runs straight forward, then bends down and picks up something with both hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -3.28892866695892,
      "Minus Multimodal Distance": -6.361992359161377,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.21888983307872e-05,
      "MoBERT-F": 0.3658667307926168,
      "MoBERT-N": 0.5504105958700917,
      "MoBERT-min(F/N)": 0.3658667307926168,
      "MoBERT-max(F/N)": 0.5504105958700917,
      "MotionCritic": -11.847949981689453,
      "VeMo (human-opt view)": 0.6797202797202797,
      "VeMo (max entropy view)": 0.6797202797202797,
      "VeMo (min entropy view)": 0.25666666666666665,
      "VeMo (random view)": 0.6797202797202797,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person quickly runs straight forward, then bends down and picks up something with both hands."
  },
  "008216": {
    "text": "a man jumps down from a curve.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0130433174349713,
      "Minus Multimodal Distance": -6.415754318237305,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4265087631647475e-05,
      "MoBERT-F": 0.4634475317147303,
      "MoBERT-N": 0.5879888534456599,
      "MoBERT-min(F/N)": 0.4634475317147303,
      "MoBERT-max(F/N)": 0.5879888534456599,
      "MotionCritic": -2.67226505279541,
      "VeMo (human-opt view)": 1.3411492621229654e-05,
      "VeMo (max entropy view)": 1.6759372679170825e-05,
      "VeMo (min entropy view)": 1.3411492621229654e-05,
      "VeMo (random view)": 1.3411492621229654e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man jumps down from a curve."
  },
  "008227": {
    "text": "a person jumps and stretches out her legs and arms to the sides, pauses, and then performs jumping jacks.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3826087486224319,
      "Minus Multimodal Distance": -4.954286575317383,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8488041162490845,
      "MoBERT-F": 0.7961039544415874,
      "MoBERT-N": 0.777073980121909,
      "MoBERT-min(F/N)": 0.777073980121909,
      "MoBERT-max(F/N)": 0.7961039544415874,
      "MotionCritic": -5.0565385818481445,
      "VeMo (human-opt view)": 0.5928338762214984,
      "VeMo (max entropy view)": 0.53125,
      "VeMo (min entropy view)": 0.5928338762214984,
      "VeMo (random view)": 0.5928338762214984,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps, stretches out their legs and arms to the sides, pauses, and then does jumping jacks."
  },
  "008232": {
    "text": "a man pets a large dog.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8739886032028032,
      "Minus Multimodal Distance": -12.182034492492676,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.08604990690946579,
      "MoBERT-F": 0.5693176836032512,
      "MoBERT-N": 0.5190602081301827,
      "MoBERT-min(F/N)": 0.5190602081301827,
      "MoBERT-max(F/N)": 0.5693176836032512,
      "MotionCritic": -7.748357772827148,
      "VeMo (human-opt view)": 4.223396992641617e-06,
      "VeMo (max entropy view)": 4.944953484235418e-06,
      "VeMo (min entropy view)": 4.223396992641617e-06,
      "VeMo (random view)": 4.223396992641617e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man pets a large dog."
  },
  "008235": {
    "text": "a person jumps into the air.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.073169845941241,
      "Minus Multimodal Distance": -13.966024398803711,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.32496920228004456,
      "MoBERT-F": 0.722743352006914,
      "MoBERT-N": 0.6546335518831581,
      "MoBERT-min(F/N)": 0.6546335518831581,
      "MoBERT-max(F/N)": 0.722743352006914,
      "MotionCritic": -3.07838773727417,
      "VeMo (human-opt view)": 0.19172932330827067,
      "VeMo (max entropy view)": 0.19172932330827067,
      "VeMo (min entropy view)": 0.00180295201042089,
      "VeMo (random view)": 0.19172932330827067,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps into the air."
  },
  "008266": {
    "text": "walking to the side then the other.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4349715783029806,
      "Minus Multimodal Distance": -6.354811668395996,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.078774898312986e-05,
      "MoBERT-F": 0.46908103001469986,
      "MoBERT-N": 0.5991359844782218,
      "MoBERT-min(F/N)": 0.46908103001469986,
      "MoBERT-max(F/N)": 0.5991359844782218,
      "MotionCritic": -3.88057279586792,
      "VeMo (human-opt view)": 0.98086813412878,
      "VeMo (max entropy view)": 0.95276266819353,
      "VeMo (min entropy view)": 0.98086813412878,
      "VeMo (random view)": 0.95276266819353,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks to one side then the other."
  },
  "008296": {
    "text": "the man dances his feet in circles in front of himself.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7456337902263286,
      "Minus Multimodal Distance": -9.015400886535645,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3016165869194083e-05,
      "MoBERT-F": 0.39885836750723574,
      "MoBERT-N": 0.5488807461265315,
      "MoBERT-min(F/N)": 0.39885836750723574,
      "MoBERT-max(F/N)": 0.5488807461265315,
      "MotionCritic": -10.981502532958984,
      "VeMo (human-opt view)": 1.2222064743996708e-05,
      "VeMo (max entropy view)": 0.00042983558095745184,
      "VeMo (min entropy view)": 1.2222064743996708e-05,
      "VeMo (random view)": 0.00042983558095745184,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man dances with his feet in circles in front of himself."
  },
  "008310": {
    "text": "person is walking backwards with their toes bent .",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0489744547612754,
      "Minus Multimodal Distance": -7.570670127868652,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.450395792722702,
      "MoBERT-F": 0.5897619147182862,
      "MoBERT-N": 0.46739043381191514,
      "MoBERT-min(F/N)": 0.46739043381191514,
      "MoBERT-max(F/N)": 0.5897619147182862,
      "MotionCritic": -17.216678619384766,
      "VeMo (human-opt view)": 0.7554179566563467,
      "VeMo (max entropy view)": 0.7554179566563467,
      "VeMo (min entropy view)": 0.8873949579831932,
      "VeMo (random view)": 0.7554179566563467,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking backwards with their toes bent."
  },
  "008312": {
    "text": "the person walks across the area, picks something up and then starts to move its arm as if to make a scrubbing motion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4254695668951445,
      "Minus Multimodal Distance": -8.401330947875977,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0021224915981292725,
      "MoBERT-F": 0.4583345721450664,
      "MoBERT-N": 0.39803178285664687,
      "MoBERT-min(F/N)": 0.39803178285664687,
      "MoBERT-max(F/N)": 0.4583345721450664,
      "MotionCritic": -8.531073570251465,
      "VeMo (human-opt view)": 0.7984570877531341,
      "VeMo (max entropy view)": 0.7879924953095685,
      "VeMo (min entropy view)": 0.7984570877531341,
      "VeMo (random view)": 0.7984570877531341,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person walks across the area, picks something up, and then starts to move their arm as if to make a scrubbing motion."
  },
  "008315": {
    "text": "he is self balancing while walking through a narrow bridge.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1914423540569576,
      "Minus Multimodal Distance": -11.05182933807373,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.005185229238122702,
      "MoBERT-F": 0.672574396044243,
      "MoBERT-N": 0.5714355266577994,
      "MoBERT-min(F/N)": 0.5714355266577994,
      "MoBERT-max(F/N)": 0.672574396044243,
      "MotionCritic": -9.785626411437988,
      "VeMo (human-opt view)": 0.008059516429014259,
      "VeMo (max entropy view)": 0.008059516429014259,
      "VeMo (min entropy view)": 0.0063019453831400125,
      "VeMo (random view)": 0.008059516429014259,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is self - balancing while walking across a narrow bridge."
  },
  "008317": {
    "text": "a person walks straight and turns to the right",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1783560956777823,
      "Minus Multimodal Distance": -6.676339626312256,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.352666706428863e-05,
      "MoBERT-F": 0.48842181703592086,
      "MoBERT-N": 0.5479351749121999,
      "MoBERT-min(F/N)": 0.48842181703592086,
      "MoBERT-max(F/N)": 0.5479351749121999,
      "MotionCritic": -5.007429122924805,
      "VeMo (human-opt view)": 0.044656297039638734,
      "VeMo (max entropy view)": 0.13286713286713286,
      "VeMo (min entropy view)": 0.044656297039638734,
      "VeMo (random view)": 0.13286713286713286,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks straight and turns to the right."
  },
  "008320": {
    "text": "a person while standing still raises his left hand up and down and turns to his left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6554777189073179,
      "Minus Multimodal Distance": -8.2163667678833,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3593307560076937e-05,
      "MoBERT-F": 0.33114908936079845,
      "MoBERT-N": 0.4905926424631475,
      "MoBERT-min(F/N)": 0.33114908936079845,
      "MoBERT-max(F/N)": 0.4905926424631475,
      "MotionCritic": -8.13676643371582,
      "VeMo (human-opt view)": 0.7666034155597723,
      "VeMo (max entropy view)": 0.7666034155597723,
      "VeMo (min entropy view)": 0.8871595330739299,
      "VeMo (random view)": 0.7666034155597723,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person, while standing still, raises his left hand up and down and turns to his left."
  },
  "008343": {
    "text": "a standing man leans down to a kneeled position with his left knee contacting the ground and his right leg planted foot down. the man then stands up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5042288787660356,
      "Minus Multimodal Distance": -10.553614616394043,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.0151193641358986e-05,
      "MoBERT-F": 0.3387642523032475,
      "MoBERT-N": 0.5520940392617857,
      "MoBERT-min(F/N)": 0.3387642523032475,
      "MoBERT-max(F/N)": 0.5520940392617857,
      "MotionCritic": -1.9366377592086792,
      "VeMo (human-opt view)": 0.0008566232552599875,
      "VeMo (max entropy view)": 0.006691039525419052,
      "VeMo (min entropy view)": 0.0008566232552599875,
      "VeMo (random view)": 0.006691039525419052,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A standing man leans down into a kneeling position with his left knee touching the ground and his right leg planted with the foot down. The man then stands up."
  },
  "008354": {
    "text": "a person lightly kicks an object on the ground",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1214862971305204,
      "Minus Multimodal Distance": -7.543605327606201,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.500864866306074e-05,
      "MoBERT-F": 0.35168685608643285,
      "MoBERT-N": 0.5032496741165291,
      "MoBERT-min(F/N)": 0.35168685608643285,
      "MoBERT-max(F/N)": 0.5032496741165291,
      "MotionCritic": -11.052396774291992,
      "VeMo (human-opt view)": 0.6216216216216216,
      "VeMo (max entropy view)": 0.6216216216216216,
      "VeMo (min entropy view)": 0.6219239373601789,
      "VeMo (random view)": 0.6219239373601789,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lightly kicks an object on the ground."
  },
  "008357": {
    "text": "raising hands above head and stretching.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7252244139353,
      "Minus Multimodal Distance": -14.253866195678711,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.351550913066603e-05,
      "MoBERT-F": 0.444234769708337,
      "MoBERT-N": 0.49169572823101865,
      "MoBERT-min(F/N)": 0.444234769708337,
      "MoBERT-max(F/N)": 0.49169572823101865,
      "MotionCritic": -8.327351570129395,
      "VeMo (human-opt view)": 0.9992909693897,
      "VeMo (max entropy view)": 0.9991462373460178,
      "VeMo (min entropy view)": 0.9992909693897,
      "VeMo (random view)": 0.9992909693897,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is raising hands above the head and stretching."
  },
  "008378": {
    "text": "a man climbs up and seems to be putting something away with both hands",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4716730279077683,
      "Minus Multimodal Distance": -5.4166388511657715,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0005887445295229554,
      "MoBERT-F": 0.6238804714935346,
      "MoBERT-N": 0.5602887921146349,
      "MoBERT-min(F/N)": 0.5602887921146349,
      "MoBERT-max(F/N)": 0.6238804714935346,
      "MotionCritic": -10.884014129638672,
      "VeMo (human-opt view)": 0.001245879697856375,
      "VeMo (max entropy view)": 0.0026354990260112295,
      "VeMo (min entropy view)": 0.001245879697856375,
      "VeMo (random view)": 0.001245879697856375,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man climbs up and seems to be putting something away with both hands."
  },
  "008388": {
    "text": "[from 0s to 5s] a person performs a ballerina balance pose, first on their left leg, before centralising and swapping to their right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4330124187102293,
      "Minus Multimodal Distance": -9.45679759979248,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2541144062415697e-05,
      "MoBERT-F": 0.4286039152575959,
      "MoBERT-N": 0.49830795809478484,
      "MoBERT-min(F/N)": 0.4286039152575959,
      "MoBERT-max(F/N)": 0.49830795809478484,
      "MotionCritic": -9.992589950561523,
      "VeMo (human-opt view)": 0.06782608695652174,
      "VeMo (max entropy view)": 0.06782608695652174,
      "VeMo (min entropy view)": 0.015889287544848796,
      "VeMo (random view)": 0.06782608695652174,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person performs a ballerina balance pose, first on their left leg, before centralizing and swapping to their right."
  },
  "008399": {
    "text": "shaking legs side to side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1808282886623214,
      "Minus Multimodal Distance": -8.812285423278809,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4631735868752003e-05,
      "MoBERT-F": 0.4982858317956024,
      "MoBERT-N": 0.5913593132179253,
      "MoBERT-min(F/N)": 0.4982858317956024,
      "MoBERT-max(F/N)": 0.5913593132179253,
      "MotionCritic": -4.367377758026123,
      "VeMo (human-opt view)": 0.001324088443413016,
      "VeMo (max entropy view)": 0.021549213744903904,
      "VeMo (min entropy view)": 0.001324088443413016,
      "VeMo (random view)": 0.001324088443413016,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is shaking legs side to side."
  },
  "008401": {
    "text": "person squats then rotates a quarter of the way clockwise.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.22837654762352,
      "Minus Multimodal Distance": -8.56029987335205,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2288228137767874e-05,
      "MoBERT-F": 0.3273165453571396,
      "MoBERT-N": 0.4027471682325396,
      "MoBERT-min(F/N)": 0.3273165453571396,
      "MoBERT-max(F/N)": 0.4027471682325396,
      "MotionCritic": -7.128378391265869,
      "VeMo (human-opt view)": 0.7975708502024291,
      "VeMo (max entropy view)": 0.7975708502024291,
      "VeMo (min entropy view)": 0.8877146631439894,
      "VeMo (random view)": 0.7975708502024291,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person squats then rotates a quarter of the way clockwise."
  },
  "008431": {
    "text": "the person is practing balancing on one leg.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2474749412213915,
      "Minus Multimodal Distance": -10.530905723571777,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.234679959656205e-05,
      "MoBERT-F": 0.4191328027661373,
      "MoBERT-N": 0.5246774423407148,
      "MoBERT-min(F/N)": 0.4191328027661373,
      "MoBERT-max(F/N)": 0.5246774423407148,
      "MotionCritic": -7.077602386474609,
      "VeMo (human-opt view)": 0.9525909592061742,
      "VeMo (max entropy view)": 0.9364548494983278,
      "VeMo (min entropy view)": 0.9525909592061742,
      "VeMo (random view)": 0.9525909592061742,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is practicing balancing on one leg."
  },
  "008443": {
    "text": "the man is moving his arms",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.688821317100468,
      "Minus Multimodal Distance": -10.902975082397461,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4262380975415e-05,
      "MoBERT-F": 0.4519381504801443,
      "MoBERT-N": 0.5697757492788718,
      "MoBERT-min(F/N)": 0.4519381504801443,
      "MoBERT-max(F/N)": 0.5697757492788718,
      "MotionCritic": -7.750913143157959,
      "VeMo (human-opt view)": 0.9975253924284395,
      "VeMo (max entropy view)": 0.9964171715197707,
      "VeMo (min entropy view)": 0.9975253924284395,
      "VeMo (random view)": 0.9975253924284395,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man is moving his arms."
  },
  "008458": {
    "text": "a person walks over something by standing on it, before continuing on their way.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3482299518906038,
      "Minus Multimodal Distance": -3.5786378383636475,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0005099712871015072,
      "MoBERT-F": 0.5446072526976273,
      "MoBERT-N": 0.5244642107228673,
      "MoBERT-min(F/N)": 0.5244642107228673,
      "MoBERT-max(F/N)": 0.5446072526976273,
      "MotionCritic": -6.597081661224365,
      "VeMo (human-opt view)": 0.731006160164271,
      "VeMo (max entropy view)": 0.7058823529411765,
      "VeMo (min entropy view)": 0.731006160164271,
      "VeMo (random view)": 0.7058823529411765,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps over something by standing on it before continuing on their way."
  },
  "008461": {
    "text": "person walks in a semi circular shape while swinging arms slightly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.0661697529440572,
      "Minus Multimodal Distance": -4.449593544006348,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.428274067118764e-05,
      "MoBERT-F": 0.6145556476513074,
      "MoBERT-N": 0.5914194097736583,
      "MoBERT-min(F/N)": 0.5914194097736583,
      "MoBERT-max(F/N)": 0.6145556476513074,
      "MotionCritic": -7.919942855834961,
      "VeMo (human-opt view)": 0.8672897196261682,
      "VeMo (max entropy view)": 0.8672897196261682,
      "VeMo (min entropy view)": 0.8931572629051621,
      "VeMo (random view)": 0.8672897196261682,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks in a semi - circular shape while swinging their arms slightly."
  },
  "008474": {
    "text": "a person turns right while walking then stops",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0110203930114159,
      "Minus Multimodal Distance": -5.555178165435791,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.535182521678507e-05,
      "MoBERT-F": 0.5323726302165039,
      "MoBERT-N": 0.666424413061135,
      "MoBERT-min(F/N)": 0.5323726302165039,
      "MoBERT-max(F/N)": 0.666424413061135,
      "MotionCritic": -3.9312210083007812,
      "VeMo (human-opt view)": 0.9628318584070796,
      "VeMo (max entropy view)": 0.9628318584070796,
      "VeMo (min entropy view)": 0.9890129522108084,
      "VeMo (random view)": 0.9890129522108084,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person turns right while walking, then stops."
  },
  "008484": {
    "text": "a person stretches their shoulders by moving their bent arms forward and backward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8846576496589288,
      "Minus Multimodal Distance": -7.843769073486328,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00013023358769714832,
      "MoBERT-F": 0.5418267203844882,
      "MoBERT-N": 0.5562476819775635,
      "MoBERT-min(F/N)": 0.5418267203844882,
      "MoBERT-max(F/N)": 0.5562476819775635,
      "MotionCritic": -7.747317790985107,
      "VeMo (human-opt view)": 0.39244186046511625,
      "VeMo (max entropy view)": 0.39244186046511625,
      "VeMo (min entropy view)": 0.3922413793103448,
      "VeMo (random view)": 0.3922413793103448,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stretches their shoulders by moving their bent arms forward and backward."
  },
  "008500": {
    "text": "a person jumps straight up with both arms down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9990001010394255,
      "Minus Multimodal Distance": -8.507132530212402,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00020847977430094033,
      "MoBERT-F": 0.519862146244372,
      "MoBERT-N": 0.6622652638778783,
      "MoBERT-min(F/N)": 0.519862146244372,
      "MoBERT-max(F/N)": 0.6622652638778783,
      "MotionCritic": -4.977997303009033,
      "VeMo (human-opt view)": 0.02162565249813572,
      "VeMo (max entropy view)": 0.02162565249813572,
      "VeMo (min entropy view)": 0.002812348472603847,
      "VeMo (random view)": 0.002812348472603847,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps straight up with both arms down."
  },
  "008518": {
    "text": "person is leaving in a circular motion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.470736910188327,
      "Minus Multimodal Distance": -11.06518268585205,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00021695159375667572,
      "MoBERT-F": 0.5456359026009306,
      "MoBERT-N": 0.48221472995093706,
      "MoBERT-min(F/N)": 0.48221472995093706,
      "MoBERT-max(F/N)": 0.5456359026009306,
      "MotionCritic": -2.889106273651123,
      "VeMo (human-opt view)": 0.008565700747220703,
      "VeMo (max entropy view)": 0.2943327239488117,
      "VeMo (min entropy view)": 0.008565700747220703,
      "VeMo (random view)": 0.2943327239488117,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is moving in a circular motion."
  },
  "008556": {
    "text": "a person gets down and crawls around the floor.a",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5662699206632043,
      "Minus Multimodal Distance": -4.206987380981445,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4035862224991433e-05,
      "MoBERT-F": 0.43781098955013487,
      "MoBERT-N": 0.5677887677524923,
      "MoBERT-min(F/N)": 0.43781098955013487,
      "MoBERT-max(F/N)": 0.5677887677524923,
      "MotionCritic": -10.07037353515625,
      "VeMo (human-opt view)": 0.995913563699369,
      "VeMo (max entropy view)": 0.995913563699369,
      "VeMo (min entropy view)": 0.9966301777005672,
      "VeMo (random view)": 0.9966301777005672,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person gets down and crawls around the floor."
  },
  "008564": {
    "text": "a person is wrinkling a paper in their hands and moving their feet from side to side",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3132420338192656,
      "Minus Multimodal Distance": -8.287664413452148,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.7548648833762854e-05,
      "MoBERT-F": 0.4902746847120584,
      "MoBERT-N": 0.4561357744400525,
      "MoBERT-min(F/N)": 0.4561357744400525,
      "MoBERT-max(F/N)": 0.4902746847120584,
      "MotionCritic": -8.68163776397705,
      "VeMo (human-opt view)": 2.0234071722155016e-05,
      "VeMo (max entropy view)": 0.0021877436065079084,
      "VeMo (min entropy view)": 2.0234071722155016e-05,
      "VeMo (random view)": 0.0021877436065079084,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is wrinkling a piece of paper in their hands and moving their feet from side to side."
  },
  "008565": {
    "text": "someone slowly and dramatically walks forward in a straight line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.805383735857394,
      "Minus Multimodal Distance": -6.625991344451904,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7457759642857127e-05,
      "MoBERT-F": 0.49718890773013935,
      "MoBERT-N": 0.613300310054423,
      "MoBERT-min(F/N)": 0.49718890773013935,
      "MoBERT-max(F/N)": 0.613300310054423,
      "MotionCritic": -7.417003631591797,
      "VeMo (human-opt view)": 0.9152129817444219,
      "VeMo (max entropy view)": 0.9152129817444219,
      "VeMo (min entropy view)": 0.9152129817444219,
      "VeMo (random view)": 0.9152129817444219,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone walks forward slowly and dramatically in a straight line."
  },
  "008567": {
    "text": "a person shifts their weight from side to side, first settling back on their left foot, then their right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8354694180486338,
      "Minus Multimodal Distance": -10.881189346313477,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00012700729712378234,
      "MoBERT-F": 0.559075491472513,
      "MoBERT-N": 0.5789369667124304,
      "MoBERT-min(F/N)": 0.559075491472513,
      "MoBERT-max(F/N)": 0.5789369667124304,
      "MotionCritic": -5.176884174346924,
      "VeMo (human-opt view)": 0.7549407114624506,
      "VeMo (max entropy view)": 0.7063492063492064,
      "VeMo (min entropy view)": 0.7549407114624506,
      "VeMo (random view)": 0.7549407114624506,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person shifts their weight from side to side, first settling back on their left foot, then their right."
  },
  "008583": {
    "text": "a person walks in a s shape",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5078891052491337,
      "Minus Multimodal Distance": -3.0467185974121094,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.473847234796267e-05,
      "MoBERT-F": 0.49432379570362434,
      "MoBERT-N": 0.5600338835969624,
      "MoBERT-min(F/N)": 0.49432379570362434,
      "MoBERT-max(F/N)": 0.5600338835969624,
      "MotionCritic": -5.318166255950928,
      "VeMo (human-opt view)": 0.6363636363636364,
      "VeMo (max entropy view)": 0.6363636363636364,
      "VeMo (min entropy view)": 0.6503667481662592,
      "VeMo (random view)": 0.6503667481662592,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks in an S shape."
  },
  "008597": {
    "text": "a person is washing a window",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9442740761380021,
      "Minus Multimodal Distance": -2.493083953857422,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5561346774338745e-05,
      "MoBERT-F": 0.31972130527101006,
      "MoBERT-N": 0.3408539308429878,
      "MoBERT-min(F/N)": 0.31972130527101006,
      "MoBERT-max(F/N)": 0.3408539308429878,
      "MotionCritic": -5.5657639503479,
      "VeMo (human-opt view)": 3.854837113857754e-06,
      "VeMo (max entropy view)": 3.854837113857754e-06,
      "VeMo (min entropy view)": 2.9972531537233923e-06,
      "VeMo (random view)": 3.854837113857754e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is washing a window."
  },
  "008614": {
    "text": "a man slowly leans forward and moves around and carries a heavy object.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6844684710597126,
      "Minus Multimodal Distance": -9.195587158203125,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.525028321542777e-05,
      "MoBERT-F": 0.4094007635636111,
      "MoBERT-N": 0.5208190593049293,
      "MoBERT-min(F/N)": 0.4094007635636111,
      "MoBERT-max(F/N)": 0.5208190593049293,
      "MotionCritic": -9.378935813903809,
      "VeMo (human-opt view)": 9.93496853453537e-05,
      "VeMo (max entropy view)": 9.93496853453537e-05,
      "VeMo (min entropy view)": 8.527472674466238e-05,
      "VeMo (random view)": 9.93496853453537e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man slowly leans forward, moves around, and carries a heavy object."
  },
  "008620": {
    "text": "the person kicks a ball with their left foot, their arms raised to a t-pose as they do so.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1020679248653287,
      "Minus Multimodal Distance": -10.915215492248535,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2614653062191792e-05,
      "MoBERT-F": 0.396770456533484,
      "MoBERT-N": 0.5261195799795906,
      "MoBERT-min(F/N)": 0.396770456533484,
      "MoBERT-max(F/N)": 0.5261195799795906,
      "MotionCritic": -3.77492618560791,
      "VeMo (human-opt view)": 0.00033599847546960385,
      "VeMo (max entropy view)": 0.00033599847546960385,
      "VeMo (min entropy view)": 0.00024575241196797526,
      "VeMo (random view)": 0.00024575241196797526,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person kicks a ball with their left foot, and their arms are raised to a T - pose as they do so."
  },
  "008633": {
    "text": "a man confidently walks down a ramp with a measured pace.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9496749461439847,
      "Minus Multimodal Distance": -10.947876930236816,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.213606295408681e-05,
      "MoBERT-F": 0.6083761934650636,
      "MoBERT-N": 0.6009076262348648,
      "MoBERT-min(F/N)": 0.6009076262348648,
      "MoBERT-max(F/N)": 0.6083761934650636,
      "MotionCritic": -9.997675895690918,
      "VeMo (human-opt view)": 0.7181818181818181,
      "VeMo (max entropy view)": 0.6218274111675127,
      "VeMo (min entropy view)": 0.7181818181818181,
      "VeMo (random view)": 0.7181818181818181,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man confidently walks down a ramp at a measured pace."
  },
  "008668": {
    "text": "a person waving one hands in a horizontal circular motion in front of them",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4222137118073152,
      "Minus Multimodal Distance": -9.585284233093262,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6904643164016306e-05,
      "MoBERT-F": 0.3755778791452531,
      "MoBERT-N": 0.4298460455849805,
      "MoBERT-min(F/N)": 0.3755778791452531,
      "MoBERT-max(F/N)": 0.4298460455849805,
      "MotionCritic": -7.2677388191223145,
      "VeMo (human-opt view)": 0.7879924953095685,
      "VeMo (max entropy view)": 0.7879924953095685,
      "VeMo (min entropy view)": 0.8080808080808081,
      "VeMo (random view)": 0.7879924953095685,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is waving one hand in a horizontal circular motion in front of them."
  },
  "008694": {
    "text": "a person walks over and across a beam.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4538689355557286,
      "Minus Multimodal Distance": -9.875633239746094,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.6752602458000183,
      "MoBERT-F": 0.6739632482363919,
      "MoBERT-N": 0.617128467689597,
      "MoBERT-min(F/N)": 0.617128467689597,
      "MoBERT-max(F/N)": 0.6739632482363919,
      "MotionCritic": -3.895392894744873,
      "VeMo (human-opt view)": 0.45426829268292684,
      "VeMo (max entropy view)": 0.45426829268292684,
      "VeMo (min entropy view)": 0.39226519337016574,
      "VeMo (random view)": 0.45426829268292684,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks across a beam."
  },
  "008696": {
    "text": "a person stands on one leg in yoga pose.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9440254203349204,
      "Minus Multimodal Distance": -3.724064826965332,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4008599211811088e-05,
      "MoBERT-F": 0.4128099372287535,
      "MoBERT-N": 0.48881611683256165,
      "MoBERT-min(F/N)": 0.4128099372287535,
      "MoBERT-max(F/N)": 0.48881611683256165,
      "MotionCritic": -15.111305236816406,
      "VeMo (human-opt view)": 0.6797385620915033,
      "VeMo (max entropy view)": 0.48427672955974843,
      "VeMo (min entropy view)": 0.6797385620915033,
      "VeMo (random view)": 0.48427672955974843,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands on one leg in a yoga pose."
  },
  "008708": {
    "text": "walking backwards and stopping.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8144707288805659,
      "Minus Multimodal Distance": -6.193142414093018,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.041712623089551926,
      "MoBERT-F": 0.5054208815077086,
      "MoBERT-N": 0.4541243042750107,
      "MoBERT-min(F/N)": 0.4541243042750107,
      "MoBERT-max(F/N)": 0.5054208815077086,
      "MotionCritic": -11.56169605255127,
      "VeMo (human-opt view)": 0.8743169398907104,
      "VeMo (max entropy view)": 0.8743169398907104,
      "VeMo (min entropy view)": 0.9924050632911392,
      "VeMo (random view)": 0.8743169398907104,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking backwards and stopping."
  },
  "008725": {
    "text": "a man walks forward in a straight line.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4027284151883335,
      "Minus Multimodal Distance": -10.092202186584473,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.346205110370647e-05,
      "MoBERT-F": 0.4651127783193841,
      "MoBERT-N": 0.6363938957255606,
      "MoBERT-min(F/N)": 0.4651127783193841,
      "MoBERT-max(F/N)": 0.6363938957255606,
      "MotionCritic": -14.134631156921387,
      "VeMo (human-opt view)": 0.98408229561451,
      "VeMo (max entropy view)": 0.98408229561451,
      "VeMo (min entropy view)": 0.9867928005405737,
      "VeMo (random view)": 0.9867928005405737,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man walks forward in a straight line."
  },
  "008730": {
    "text": "person is doing a huge stretch  to the right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.344172503505537,
      "Minus Multimodal Distance": -11.216343879699707,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00015999015886336565,
      "MoBERT-F": 0.5626919546792148,
      "MoBERT-N": 0.5582713205996439,
      "MoBERT-min(F/N)": 0.5582713205996439,
      "MoBERT-max(F/N)": 0.5626919546792148,
      "MotionCritic": -8.3997802734375,
      "VeMo (human-opt view)": 0.9049773755656109,
      "VeMo (max entropy view)": 0.9049773755656109,
      "VeMo (min entropy view)": 0.9436298468556533,
      "VeMo (random view)": 0.9049773755656109,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is doing a huge stretch to the right."
  },
  "008739": {
    "text": "a person jumps forward once.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8523930790723858,
      "Minus Multimodal Distance": -2.473994016647339,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3217016860144213e-05,
      "MoBERT-F": 0.4518320316876786,
      "MoBERT-N": 0.5735230087292775,
      "MoBERT-min(F/N)": 0.4518320316876786,
      "MoBERT-max(F/N)": 0.5735230087292775,
      "MotionCritic": -1.0402047634124756,
      "VeMo (human-opt view)": 0.8600556070435589,
      "VeMo (max entropy view)": 0.6218274111675127,
      "VeMo (min entropy view)": 0.8600556070435589,
      "VeMo (random view)": 0.8600556070435589,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps forward once."
  },
  "008782": {
    "text": "a person walks forwards stiffly with both arms out in front of them.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9263292135647054,
      "Minus Multimodal Distance": -7.378607273101807,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.495597072993405e-05,
      "MoBERT-F": 0.5296651942132401,
      "MoBERT-N": 0.5736310648325653,
      "MoBERT-min(F/N)": 0.5296651942132401,
      "MoBERT-max(F/N)": 0.5736310648325653,
      "MotionCritic": -5.377802848815918,
      "VeMo (human-opt view)": 0.008567069592770801,
      "VeMo (max entropy view)": 0.008567069592770801,
      "VeMo (min entropy view)": 0.007605405006276305,
      "VeMo (random view)": 0.007605405006276305,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward stiffly with both arms out in front of them."
  },
  "008801": {
    "text": "a man jumps forward with his arms at his sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9155333311434278,
      "Minus Multimodal Distance": -11.697664260864258,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 8.686566434334964e-05,
      "MoBERT-F": 0.554765459098377,
      "MoBERT-N": 0.6917003818222861,
      "MoBERT-min(F/N)": 0.554765459098377,
      "MoBERT-max(F/N)": 0.6917003818222861,
      "MotionCritic": -8.402975082397461,
      "VeMo (human-opt view)": 0.5922619047619048,
      "VeMo (max entropy view)": 0.5922619047619048,
      "VeMo (min entropy view)": 0.11926605504587157,
      "VeMo (random view)": 0.11926605504587157,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man jumps forward with his arms at his sides."
  },
  "008803": {
    "text": "a person holds their arms out to each side, teetering them up and down slightly. then, they begin to rotate their arms in slow, wide circles,",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6760865386604932,
      "Minus Multimodal Distance": -6.216876983642578,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8805592592107132e-05,
      "MoBERT-F": 0.46019334769873177,
      "MoBERT-N": 0.5212799925238099,
      "MoBERT-min(F/N)": 0.46019334769873177,
      "MoBERT-max(F/N)": 0.5212799925238099,
      "MotionCritic": -10.20145034790039,
      "VeMo (human-opt view)": 0.608955223880597,
      "VeMo (max entropy view)": 0.608955223880597,
      "VeMo (min entropy view)": 0.6225352112676056,
      "VeMo (random view)": 0.608955223880597,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person holds their arms out to each side, teetering them up and down slightly. Then, they begin to rotate their arms in slow, wide circles."
  },
  "008822": {
    "text": "a person walks forward slowly without moving forward, as though walking on a treadmill, and his arms remain still at his sides.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0134382227850647,
      "Minus Multimodal Distance": -6.808028697967529,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.08006204664707184,
      "MoBERT-F": 0.7360500579332576,
      "MoBERT-N": 0.6204346517178954,
      "MoBERT-min(F/N)": 0.6204346517178954,
      "MoBERT-max(F/N)": 0.7360500579332576,
      "MotionCritic": -3.5170955657958984,
      "VeMo (human-opt view)": 0.7185840707964601,
      "VeMo (max entropy view)": 0.7185430463576159,
      "VeMo (min entropy view)": 0.7185840707964601,
      "VeMo (random view)": 0.7185840707964601,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward slowly but doesn't actually move forward, as if walking on a treadmill, and his arms remain stationary at his sides."
  },
  "008824": {
    "text": "the person waddles low to the ground and then stands up and walks back.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6144659015722038,
      "Minus Multimodal Distance": -8.611666679382324,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.005352742504328489,
      "MoBERT-F": 0.6008487974153565,
      "MoBERT-N": 0.6059457711662752,
      "MoBERT-min(F/N)": 0.6008487974153565,
      "MoBERT-max(F/N)": 0.6059457711662752,
      "MotionCritic": -7.622389316558838,
      "VeMo (human-opt view)": 0.8990825688073395,
      "VeMo (max entropy view)": 0.7983539094650206,
      "VeMo (min entropy view)": 0.8990825688073395,
      "VeMo (random view)": 0.8990825688073395,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person waddles close to the ground, then stands up and walks back."
  },
  "008838": {
    "text": "person picked up his phone made a call and then put it back down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.181222966813267,
      "Minus Multimodal Distance": -7.413156509399414,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.1915416659321636e-05,
      "MoBERT-F": 0.4158096477895705,
      "MoBERT-N": 0.5564406909084184,
      "MoBERT-min(F/N)": 0.4158096477895705,
      "MoBERT-max(F/N)": 0.5564406909084184,
      "MotionCritic": -9.577289581298828,
      "VeMo (human-opt view)": 0.0009700402510954765,
      "VeMo (max entropy view)": 0.014965324248692055,
      "VeMo (min entropy view)": 0.0009700402510954765,
      "VeMo (random view)": 0.0009700402510954765,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person picked up his phone, made a call, and then put it back down."
  },
  "008840": {
    "text": "a person who seems to evade something from their left side and run at a insane pace",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.9656796516292045,
      "Minus Multimodal Distance": -7.6498494148254395,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.1134379241848364e-05,
      "MoBERT-F": 0.504788540099583,
      "MoBERT-N": 0.5662642153664372,
      "MoBERT-min(F/N)": 0.504788540099583,
      "MoBERT-max(F/N)": 0.5662642153664372,
      "MotionCritic": -1.7991454601287842,
      "VeMo (human-opt view)": 0.0001090284468246322,
      "VeMo (max entropy view)": 0.0004880429477794046,
      "VeMo (min entropy view)": 0.0001090284468246322,
      "VeMo (random view)": 0.0001090284468246322,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who seems to evade something from their left side and run at an insane pace."
  },
  "008843": {
    "text": "a person lifts and spins around their right leg then lifts and spins around their left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9025789808384288,
      "Minus Multimodal Distance": -8.698315620422363,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5967630790546536e-05,
      "MoBERT-F": 0.40707995504423344,
      "MoBERT-N": 0.6134943861196551,
      "MoBERT-min(F/N)": 0.40707995504423344,
      "MoBERT-max(F/N)": 0.6134943861196551,
      "MotionCritic": -12.758243560791016,
      "VeMo (human-opt view)": 0.08982035928143713,
      "VeMo (max entropy view)": 0.36245954692556637,
      "VeMo (min entropy view)": 0.08982035928143713,
      "VeMo (random view)": 0.08982035928143713,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lifts and spins around their right leg, then lifts and spins around their left leg."
  },
  "008862": {
    "text": "someone jumps up twice, bringing their hands together in front of them as they go upwards and releasing them as they go downwards.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8012950830948232,
      "Minus Multimodal Distance": -9.674857139587402,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0004574168415274471,
      "MoBERT-F": 0.624331989245014,
      "MoBERT-N": 0.5909549849743205,
      "MoBERT-min(F/N)": 0.5909549849743205,
      "MoBERT-max(F/N)": 0.624331989245014,
      "MotionCritic": -13.58045768737793,
      "VeMo (human-opt view)": 0.2682926829268293,
      "VeMo (max entropy view)": 0.2682926829268293,
      "VeMo (min entropy view)": 0.009107468123861567,
      "VeMo (random view)": 0.009107468123861567,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone jumps up twice, bringing their hands together in front of them as they go upward and releasing them as they go downward."
  },
  "008888": {
    "text": "person is doing across punch to the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9738096096756707,
      "Minus Multimodal Distance": -4.495030879974365,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2520991478813812e-05,
      "MoBERT-F": 0.3678327236541797,
      "MoBERT-N": 0.4669596692285939,
      "MoBERT-min(F/N)": 0.3678327236541797,
      "MoBERT-max(F/N)": 0.4669596692285939,
      "MotionCritic": -7.777894496917725,
      "VeMo (human-opt view)": 0.6376146788990825,
      "VeMo (max entropy view)": 0.6376146788990825,
      "VeMo (min entropy view)": 0.6651982378854625,
      "VeMo (random view)": 0.6376146788990825,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is doing a cross punch to the right."
  },
  "008904": {
    "text": "a person bends over and then picks up something with his left hand and then the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.089036397348988,
      "Minus Multimodal Distance": -9.905248641967773,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5896753868437372e-05,
      "MoBERT-F": 0.422251809559378,
      "MoBERT-N": 0.49224932188787895,
      "MoBERT-min(F/N)": 0.422251809559378,
      "MoBERT-max(F/N)": 0.49224932188787895,
      "MotionCritic": -12.146175384521484,
      "VeMo (human-opt view)": 5.590975420208394e-06,
      "VeMo (max entropy view)": 1.380538046538664e-05,
      "VeMo (min entropy view)": 5.590975420208394e-06,
      "VeMo (random view)": 1.380538046538664e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends over, picks up something with their left hand, and then with their right."
  },
  "008905": {
    "text": "a person raises their left arm up to their face.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4610373367028328,
      "Minus Multimodal Distance": -9.499384880065918,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5948222173610702e-05,
      "MoBERT-F": 0.39188617199520387,
      "MoBERT-N": 0.51541802772094,
      "MoBERT-min(F/N)": 0.39188617199520387,
      "MoBERT-max(F/N)": 0.51541802772094,
      "MotionCritic": -8.626174926757812,
      "VeMo (human-opt view)": 0.8936454849498328,
      "VeMo (max entropy view)": 0.8936454849498328,
      "VeMo (min entropy view)": 0.9706019598693421,
      "VeMo (random view)": 0.8936454849498328,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises their left arm up to their face. "
  },
  "008927": {
    "text": "person standing raises right knee upward, then puts foot back down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7951128752238232,
      "Minus Multimodal Distance": -8.61527156829834,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6135450752917677e-05,
      "MoBERT-F": 0.3858897138890241,
      "MoBERT-N": 0.5983023744373902,
      "MoBERT-min(F/N)": 0.3858897138890241,
      "MoBERT-max(F/N)": 0.5983023744373902,
      "MotionCritic": -5.986139297485352,
      "VeMo (human-opt view)": 0.9198184568835098,
      "VeMo (max entropy view)": 0.7543859649122807,
      "VeMo (min entropy view)": 0.9198184568835098,
      "VeMo (random view)": 0.7543859649122807,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person standing raises their right knee upward, then puts their foot back down."
  },
  "008936": {
    "text": "the person is dribbling a basketball backwards",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6065522599528883,
      "Minus Multimodal Distance": -10.334975242614746,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9570797681808472,
      "MoBERT-F": 0.7705246730283261,
      "MoBERT-N": 0.7528861767016485,
      "MoBERT-min(F/N)": 0.7528861767016485,
      "MoBERT-max(F/N)": 0.7705246730283261,
      "MotionCritic": -9.655265808105469,
      "VeMo (human-opt view)": 0.007559604574529948,
      "VeMo (max entropy view)": 0.02158273381294964,
      "VeMo (min entropy view)": 0.007559604574529948,
      "VeMo (random view)": 0.02158273381294964,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is dribbling a basketball backwards."
  },
  "008955": {
    "text": "man left foot stump one then stay in standing position.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.056399121013332,
      "Minus Multimodal Distance": -6.876729965209961,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.482193667674437e-05,
      "MoBERT-F": 0.43426293923505904,
      "MoBERT-N": 0.48213843299668235,
      "MoBERT-min(F/N)": 0.43426293923505904,
      "MoBERT-max(F/N)": 0.48213843299668235,
      "MotionCritic": -11.448648452758789,
      "VeMo (human-opt view)": 0.5154639175257731,
      "VeMo (max entropy view)": 0.5154639175257731,
      "VeMo (min entropy view)": 0.08506075768406005,
      "VeMo (random view)": 0.08506075768406005,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man's left foot was amputated at the stump, and then he remained in a standing position."
  },
  "008971": {
    "text": "a person walks around in a circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.073106129283247,
      "Minus Multimodal Distance": -6.049375057220459,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.371692481683567e-05,
      "MoBERT-F": 0.5707220474540649,
      "MoBERT-N": 0.5675732417448445,
      "MoBERT-min(F/N)": 0.5675732417448445,
      "MoBERT-max(F/N)": 0.5707220474540649,
      "MotionCritic": -6.599801540374756,
      "VeMo (human-opt view)": 0.808199121522694,
      "VeMo (max entropy view)": 0.7769347496206374,
      "VeMo (min entropy view)": 0.808199121522694,
      "VeMo (random view)": 0.7769347496206374,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks around in a circle."
  },
  "008998": {
    "text": "the person did a kick spin to the right",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7474275574571903,
      "Minus Multimodal Distance": -9.391566276550293,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9884243011474609,
      "MoBERT-F": 0.7326838347184279,
      "MoBERT-N": 0.71426300424614,
      "MoBERT-min(F/N)": 0.71426300424614,
      "MoBERT-max(F/N)": 0.7326838347184279,
      "MotionCritic": -11.403631210327148,
      "VeMo (human-opt view)": 0.5778364116094987,
      "VeMo (max entropy view)": 0.5778364116094987,
      "VeMo (min entropy view)": 0.7774193548387097,
      "VeMo (random view)": 0.5778364116094987,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person did a kick spin to the right."
  },
  "009031": {
    "text": "a person takes small steps while holding something in both of their arms angled down towards the ground",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7800752486598674,
      "Minus Multimodal Distance": -9.08840560913086,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.10600551962852478,
      "MoBERT-F": 0.621303768119962,
      "MoBERT-N": 0.5278162139114675,
      "MoBERT-min(F/N)": 0.5278162139114675,
      "MoBERT-max(F/N)": 0.621303768119962,
      "MotionCritic": -17.216678619384766,
      "VeMo (human-opt view)": 0.28038897893030795,
      "VeMo (max entropy view)": 0.28038897893030795,
      "VeMo (min entropy view)": 0.05668934240362812,
      "VeMo (random view)": 0.05668934240362812,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person takes small steps while holding something in both arms, with the arms angled down towards the ground."
  },
  "009041": {
    "text": "a person does a throwing motion with his right arm",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.803248319047201,
      "Minus Multimodal Distance": -4.142184257507324,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0004926174296997488,
      "MoBERT-F": 0.526798831880122,
      "MoBERT-N": 0.5970233487691814,
      "MoBERT-min(F/N)": 0.526798831880122,
      "MoBERT-max(F/N)": 0.5970233487691814,
      "MotionCritic": -8.242656707763672,
      "VeMo (human-opt view)": 0.8522727272727273,
      "VeMo (max entropy view)": 0.8522727272727273,
      "VeMo (min entropy view)": 0.8741463414634146,
      "VeMo (random view)": 0.8741463414634146,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person makes a throwing motion with their right arm."
  },
  "009072": {
    "text": "the person was flying around like a fly.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.2454300480284117,
      "Minus Multimodal Distance": -11.094409942626953,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00021796755027025938,
      "MoBERT-F": 0.6305988564270048,
      "MoBERT-N": 0.5854972889442608,
      "MoBERT-min(F/N)": 0.5854972889442608,
      "MoBERT-max(F/N)": 0.6305988564270048,
      "MotionCritic": -9.454326629638672,
      "VeMo (human-opt view)": 5.086237150893398e-06,
      "VeMo (max entropy view)": 2.206158408588118e-05,
      "VeMo (min entropy view)": 5.086237150893398e-06,
      "VeMo (random view)": 5.086237150893398e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was flying around like a fly."
  },
  "009084": {
    "text": "a person is bent forward with arms dangling in front of them",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7152855999624461,
      "Minus Multimodal Distance": -6.6118950843811035,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3659820726606995e-05,
      "MoBERT-F": 0.37723476173049947,
      "MoBERT-N": 0.49661856747965233,
      "MoBERT-min(F/N)": 0.37723476173049947,
      "MoBERT-max(F/N)": 0.49661856747965233,
      "MotionCritic": -3.980564594268799,
      "VeMo (human-opt view)": 0.9707214005433142,
      "VeMo (max entropy view)": 0.9398907103825137,
      "VeMo (min entropy view)": 0.9707214005433142,
      "VeMo (random view)": 0.9707214005433142,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is bent forward with their arms dangling in front of them."
  },
  "009096": {
    "text": "a person is standing still, then walks forward and kicks something on the floor with his right foot",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6744047374884051,
      "Minus Multimodal Distance": -3.5524537563323975,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.892547607421875,
      "MoBERT-F": 0.7351464305883391,
      "MoBERT-N": 0.6070681319745738,
      "MoBERT-min(F/N)": 0.6070681319745738,
      "MoBERT-max(F/N)": 0.7351464305883391,
      "MotionCritic": -8.126420021057129,
      "VeMo (human-opt view)": 0.9239189928845101,
      "VeMo (max entropy view)": 0.9239189928845101,
      "VeMo (min entropy view)": 0.9240246406570842,
      "VeMo (random view)": 0.9240246406570842,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing still. Then, he walks forward and kicks something on the floor with his right foot."
  },
  "009123": {
    "text": "a person moves forward quickly and lifts both legs before landing and continuing to move forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.461843058756034,
      "Minus Multimodal Distance": -7.772768974304199,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0001457057660445571,
      "MoBERT-F": 0.592116855747049,
      "MoBERT-N": 0.7407728427258393,
      "MoBERT-min(F/N)": 0.592116855747049,
      "MoBERT-max(F/N)": 0.7407728427258393,
      "MotionCritic": -10.872123718261719,
      "VeMo (human-opt view)": 0.7659574468085106,
      "VeMo (max entropy view)": 0.546448087431694,
      "VeMo (min entropy view)": 0.7659574468085106,
      "VeMo (random view)": 0.546448087431694,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person moves forward rapidly, lifts both legs before landing, and then continues to move forward."
  },
  "009135": {
    "text": "a person walks in a counter clockwise circle then sits down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.0097810129635256,
      "Minus Multimodal Distance": -4.744020938873291,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.307831891812384e-05,
      "MoBERT-F": 0.4448805465414109,
      "MoBERT-N": 0.4712948277245581,
      "MoBERT-min(F/N)": 0.4448805465414109,
      "MoBERT-max(F/N)": 0.4712948277245581,
      "MotionCritic": -9.308561325073242,
      "VeMo (human-opt view)": 0.3495575221238938,
      "VeMo (max entropy view)": 0.3495575221238938,
      "VeMo (min entropy view)": 0.25704225352112675,
      "VeMo (random view)": 0.3495575221238938,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks in a counter - clockwise circle and then sits down."
  },
  "009140": {
    "text": "a figure lunges backwards in a flexed pose.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3938262570615745,
      "Minus Multimodal Distance": -5.598520278930664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.016621220856904984,
      "MoBERT-F": 0.5028233810773044,
      "MoBERT-N": 0.503539448779442,
      "MoBERT-min(F/N)": 0.5028233810773044,
      "MoBERT-max(F/N)": 0.503539448779442,
      "MotionCritic": -5.251615047454834,
      "VeMo (human-opt view)": 0.5631768953068592,
      "VeMo (max entropy view)": 0.5631768953068592,
      "VeMo (min entropy view)": 0.7431906614785992,
      "VeMo (random view)": 0.5631768953068592,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure lunges backward in a flexed pose."
  },
  "009171": {
    "text": "a person sits down to avoid pressure on their knees.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3877839022497949,
      "Minus Multimodal Distance": -7.107057571411133,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4689355996088125e-05,
      "MoBERT-F": 0.38125775667399764,
      "MoBERT-N": 0.4425170264202987,
      "MoBERT-min(F/N)": 0.38125775667399764,
      "MoBERT-max(F/N)": 0.4425170264202987,
      "MotionCritic": -11.773435592651367,
      "VeMo (human-opt view)": 0.8348170128585559,
      "VeMo (max entropy view)": 0.8348170128585559,
      "VeMo (min entropy view)": 0.8670967741935484,
      "VeMo (random view)": 0.8348170128585559,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits down to avoid pressure on their knees."
  },
  "009181": {
    "text": "a person does a swimming motion while standing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.1568303134060365,
      "Minus Multimodal Distance": -12.893543243408203,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 9.556033910484985e-05,
      "MoBERT-F": 0.546122032273226,
      "MoBERT-N": 0.5040128250783146,
      "MoBERT-min(F/N)": 0.5040128250783146,
      "MoBERT-max(F/N)": 0.546122032273226,
      "MotionCritic": -6.411703586578369,
      "VeMo (human-opt view)": 0.025954613398559586,
      "VeMo (max entropy view)": 0.025954613398559586,
      "VeMo (min entropy view)": 0.010309278350515464,
      "VeMo (random view)": 0.010309278350515464,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does a swimming motion while standing."
  },
  "009184": {
    "text": "a person raises both hands and places them in front of themselves",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5539639568332554,
      "Minus Multimodal Distance": -5.44073486328125,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4993913029902615e-05,
      "MoBERT-F": 0.33695643500020345,
      "MoBERT-N": 0.39761212668140083,
      "MoBERT-min(F/N)": 0.33695643500020345,
      "MoBERT-max(F/N)": 0.39761212668140083,
      "MotionCritic": -3.6546850204467773,
      "VeMo (human-opt view)": 0.9100443727309399,
      "VeMo (max entropy view)": 0.7307692307692307,
      "VeMo (min entropy view)": 0.9100443727309399,
      "VeMo (random view)": 0.9100443727309399,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises both hands and places them in front of themselves."
  },
  "009199": {
    "text": "the person is trying to talk with his hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7836533722172415,
      "Minus Multimodal Distance": -13.422903060913086,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00041603160207159817,
      "MoBERT-F": 0.5723548860787913,
      "MoBERT-N": 0.5727287486270556,
      "MoBERT-min(F/N)": 0.5723548860787913,
      "MoBERT-max(F/N)": 0.5727287486270556,
      "MotionCritic": -2.2065517902374268,
      "VeMo (human-opt view)": 0.7883817427385892,
      "VeMo (max entropy view)": 0.7316017316017316,
      "VeMo (min entropy view)": 0.7883817427385892,
      "VeMo (random view)": 0.7883817427385892,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is trying to talk with his hands."
  },
  "009216": {
    "text": "a person who seems to put slippers on",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9147777520090138,
      "Minus Multimodal Distance": -10.97420883178711,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3705668354523368e-05,
      "MoBERT-F": 0.44883763205501326,
      "MoBERT-N": 0.4749545685940314,
      "MoBERT-min(F/N)": 0.44883763205501326,
      "MoBERT-max(F/N)": 0.4749545685940314,
      "MotionCritic": -2.0553388595581055,
      "VeMo (human-opt view)": 7.463614877472322e-05,
      "VeMo (max entropy view)": 0.0004055902713330171,
      "VeMo (min entropy view)": 7.463614877472322e-05,
      "VeMo (random view)": 7.463614877472322e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who seems to be putting on slippers."
  },
  "009220": {
    "text": "a hunched individual slowly wobbles forward in a drunken manner.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7497999187501299,
      "Minus Multimodal Distance": -11.456228256225586,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.255206709378399e-05,
      "MoBERT-F": 0.4292723595924224,
      "MoBERT-N": 0.4851025809777016,
      "MoBERT-min(F/N)": 0.4292723595924224,
      "MoBERT-max(F/N)": 0.4851025809777016,
      "MotionCritic": -7.0326337814331055,
      "VeMo (human-opt view)": 0.899188876013905,
      "VeMo (max entropy view)": 0.6371308016877637,
      "VeMo (min entropy view)": 0.899188876013905,
      "VeMo (random view)": 0.6371308016877637,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A hunched individual slowly wobbles forward in a drunken manner."
  },
  "009244": {
    "text": "person standing with both feet firmly planted on the ground upper body moved very slightly",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7633293642850029,
      "Minus Multimodal Distance": -6.601136684417725,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.297186802024953e-05,
      "MoBERT-F": 0.423312050047563,
      "MoBERT-N": 0.5533570963231169,
      "MoBERT-min(F/N)": 0.423312050047563,
      "MoBERT-max(F/N)": 0.5533570963231169,
      "MotionCritic": -3.7029190063476562,
      "VeMo (human-opt view)": 0.8873321657910099,
      "VeMo (max entropy view)": 0.8264182895850973,
      "VeMo (min entropy view)": 0.8873321657910099,
      "VeMo (random view)": 0.8873321657910099,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing with both feet firmly planted on the ground, and their upper body moves very slightly."
  },
  "009252": {
    "text": "the man moves to his right",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.906742511702746,
      "Minus Multimodal Distance": -6.368280410766602,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.744398782146163e-05,
      "MoBERT-F": 0.44779901840748,
      "MoBERT-N": 0.5827615500541135,
      "MoBERT-min(F/N)": 0.44779901840748,
      "MoBERT-max(F/N)": 0.5827615500541135,
      "MotionCritic": -4.240560054779053,
      "VeMo (human-opt view)": 0.8991150442477877,
      "VeMo (max entropy view)": 0.8519480519480519,
      "VeMo (min entropy view)": 0.8991150442477877,
      "VeMo (random view)": 0.8991150442477877,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man moves to his right."
  },
  "009283": {
    "text": "someone gets up from the floor, seems to be talking on the phone and starts to walk",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.291392332773695,
      "Minus Multimodal Distance": -4.571803569793701,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 9.219649655278772e-05,
      "MoBERT-F": 0.3844133917718212,
      "MoBERT-N": 0.4217293060597632,
      "MoBERT-min(F/N)": 0.3844133917718212,
      "MoBERT-max(F/N)": 0.4217293060597632,
      "MotionCritic": -11.038492202758789,
      "VeMo (human-opt view)": 0.28205128205128205,
      "VeMo (max entropy view)": 0.28205128205128205,
      "VeMo (min entropy view)": 0.26903553299492383,
      "VeMo (random view)": 0.26903553299492383,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone gets up from the floor, seems to be talking on the phone, and starts to walk."
  },
  "009289": {
    "text": "the person walked forward and leaned over the table.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7368827475694925,
      "Minus Multimodal Distance": -5.973474979400635,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 8.197512215701863e-05,
      "MoBERT-F": 0.49115426639587323,
      "MoBERT-N": 0.5529044208123763,
      "MoBERT-min(F/N)": 0.49115426639587323,
      "MoBERT-max(F/N)": 0.5529044208123763,
      "MotionCritic": -6.0759782791137695,
      "VeMo (human-opt view)": 0.04477611940298507,
      "VeMo (max entropy view)": 0.04477611940298507,
      "VeMo (min entropy view)": 0.03301034652652324,
      "VeMo (random view)": 0.04477611940298507,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person walked forward and leaned over the table."
  },
  "009302": {
    "text": "person is running in place and then stands still",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7389243755223975,
      "Minus Multimodal Distance": -10.314910888671875,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.79347727377899e-05,
      "MoBERT-F": 0.3662141764866864,
      "MoBERT-N": 0.4653722277121392,
      "MoBERT-min(F/N)": 0.3662141764866864,
      "MoBERT-max(F/N)": 0.4653722277121392,
      "MotionCritic": -4.459060192108154,
      "VeMo (human-opt view)": 0.8171275646743978,
      "VeMo (max entropy view)": 0.3785310734463277,
      "VeMo (min entropy view)": 0.8171275646743978,
      "VeMo (random view)": 0.8171275646743978,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is running in place and then stands still."
  },
  "009331": {
    "text": "a man moves something from one spot to another.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9081708053885869,
      "Minus Multimodal Distance": -7.254873275756836,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3512717234552838e-05,
      "MoBERT-F": 0.42738743687123876,
      "MoBERT-N": 0.4476412868126479,
      "MoBERT-min(F/N)": 0.42738743687123876,
      "MoBERT-max(F/N)": 0.4476412868126479,
      "MotionCritic": -3.1973981857299805,
      "VeMo (human-opt view)": 0.7550077041602465,
      "VeMo (max entropy view)": 0.7432624113475177,
      "VeMo (min entropy view)": 0.7550077041602465,
      "VeMo (random view)": 0.7550077041602465,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man moves something from one spot to another."
  },
  "009349": {
    "text": "a man crawls forward on his stomach.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6580920513251447,
      "Minus Multimodal Distance": -7.203741073608398,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3460730517399497e-05,
      "MoBERT-F": 0.476099573676082,
      "MoBERT-N": 0.5353945811069778,
      "MoBERT-min(F/N)": 0.476099573676082,
      "MoBERT-max(F/N)": 0.5353945811069778,
      "MotionCritic": -6.683158874511719,
      "VeMo (human-opt view)": 0.9362999085644621,
      "VeMo (max entropy view)": 0.9325626204238922,
      "VeMo (min entropy view)": 0.9362999085644621,
      "VeMo (random view)": 0.9325626204238922,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man crawls forward on his stomach."
  },
  "009351": {
    "text": "a person squats to almost parallel then jumps to the horizontally to the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.907214852561126,
      "Minus Multimodal Distance": -8.52349853515625,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00018368224846199155,
      "MoBERT-F": 0.5464023855478527,
      "MoBERT-N": 0.5605956438555437,
      "MoBERT-min(F/N)": 0.5464023855478527,
      "MoBERT-max(F/N)": 0.5605956438555437,
      "MotionCritic": -9.597518920898438,
      "VeMo (human-opt view)": 0.546583850931677,
      "VeMo (max entropy view)": 0.546583850931677,
      "VeMo (min entropy view)": 0.6797202797202797,
      "VeMo (random view)": 0.546583850931677,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person squats almost to a parallel position and then jumps horizontally to the left."
  },
  "009359": {
    "text": "a person with both feet on the ground with both knees bended moving from one side to another, trying to hide or something",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.435169621598612,
      "Minus Multimodal Distance": -10.243725776672363,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.4444663469912484e-05,
      "MoBERT-F": 0.5628572861591256,
      "MoBERT-N": 0.5521549587911752,
      "MoBERT-min(F/N)": 0.5521549587911752,
      "MoBERT-max(F/N)": 0.5628572861591256,
      "MotionCritic": -6.24128532409668,
      "VeMo (human-opt view)": 0.6370597243491577,
      "VeMo (max entropy view)": 0.6074498567335244,
      "VeMo (min entropy view)": 0.6370597243491577,
      "VeMo (random view)": 0.6074498567335244,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person with both feet on the ground and both knees bent is moving from one side to another, trying to hide or something."
  },
  "009363": {
    "text": "someone is pulling back, while standing on one foot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2758364939833653,
      "Minus Multimodal Distance": -6.934173107147217,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7553578547667712e-05,
      "MoBERT-F": 0.37620749102818685,
      "MoBERT-N": 0.5521455397069533,
      "MoBERT-min(F/N)": 0.37620749102818685,
      "MoBERT-max(F/N)": 0.5521455397069533,
      "MotionCritic": -10.067660331726074,
      "VeMo (human-opt view)": 0.7769230769230769,
      "VeMo (max entropy view)": 0.7769230769230769,
      "VeMo (min entropy view)": 0.80859375,
      "VeMo (random view)": 0.7769230769230769,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone is pulling back while standing on one foot."
  },
  "009373": {
    "text": "a figure appears to climb stairs",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0784901161892784,
      "Minus Multimodal Distance": -4.886283874511719,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.446738835715223e-05,
      "MoBERT-F": 0.41209915928628127,
      "MoBERT-N": 0.5058212724252179,
      "MoBERT-min(F/N)": 0.41209915928628127,
      "MoBERT-max(F/N)": 0.5058212724252179,
      "MotionCritic": -6.696577548980713,
      "VeMo (human-opt view)": 0.0013276201622087573,
      "VeMo (max entropy view)": 0.0013276201622087573,
      "VeMo (min entropy view)": 0.0002783638659496444,
      "VeMo (random view)": 0.0002783638659496444,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure appears to be climbing stairs."
  },
  "009377": {
    "text": "the person is walking forward and turn around like a monster",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8265423385439947,
      "Minus Multimodal Distance": -10.501106262207031,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.001027527847327292,
      "MoBERT-F": 0.5754253775174276,
      "MoBERT-N": 0.5202740623794464,
      "MoBERT-min(F/N)": 0.5202740623794464,
      "MoBERT-max(F/N)": 0.5754253775174276,
      "MotionCritic": -7.350699424743652,
      "VeMo (human-opt view)": 0.9048991354466859,
      "VeMo (max entropy view)": 0.835881753312946,
      "VeMo (min entropy view)": 0.9048991354466859,
      "VeMo (random view)": 0.835881753312946,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking forward and turning around like a monster."
  },
  "009383": {
    "text": "a person stayed on the place",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9961328238134447,
      "Minus Multimodal Distance": -2.159682035446167,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.774528522626497e-05,
      "MoBERT-F": 0.3588974840089501,
      "MoBERT-N": 0.4247058974321014,
      "MoBERT-min(F/N)": 0.3588974840089501,
      "MoBERT-max(F/N)": 0.4247058974321014,
      "MotionCritic": 0.21728095412254333,
      "VeMo (human-opt view)": 0.0011013562415431574,
      "VeMo (max entropy view)": 0.002632820465994004,
      "VeMo (min entropy view)": 0.0011013562415431574,
      "VeMo (random view)": 0.0011013562415431574,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stayed at the place."
  },
  "009385": {
    "text": "person squats all the way to the ground, then leaps up all the way, raising both hands above the head.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.136465105241884,
      "Minus Multimodal Distance": -8.818803787231445,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00044543304829858243,
      "MoBERT-F": 0.4376742976533198,
      "MoBERT-N": 0.4199404769383871,
      "MoBERT-min(F/N)": 0.4199404769383871,
      "MoBERT-max(F/N)": 0.4376742976533198,
      "MotionCritic": -9.297491073608398,
      "VeMo (human-opt view)": 0.18248175182481752,
      "VeMo (max entropy view)": 0.23404255319148937,
      "VeMo (min entropy view)": 0.18248175182481752,
      "VeMo (random view)": 0.23404255319148937,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person squats all the way to the ground, then leaps up all the way, raising both hands above the head."
  },
  "009401": {
    "text": "the person is doing a pectoral warm up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7189133577367867,
      "Minus Multimodal Distance": -12.44228458404541,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0030481552239507437,
      "MoBERT-F": 0.551431209581102,
      "MoBERT-N": 0.4641739687270152,
      "MoBERT-min(F/N)": 0.4641739687270152,
      "MoBERT-max(F/N)": 0.551431209581102,
      "MotionCritic": -12.578967094421387,
      "VeMo (human-opt view)": 0.7665615141955836,
      "VeMo (max entropy view)": 0.7665615141955836,
      "VeMo (min entropy view)": 0.7769347496206374,
      "VeMo (random view)": 0.7665615141955836,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is doing a pectoral warm - up."
  },
  "009405": {
    "text": "a person walks forward and bends down and grabs his left knee in pain. he attempts to straighten up and walk forward and then bends down to grab his knee again. he then walks backward to his left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9079884429033466,
      "Minus Multimodal Distance": -2.234006404876709,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.2643129088683054e-05,
      "MoBERT-F": 0.4416812294851154,
      "MoBERT-N": 0.5130541041028465,
      "MoBERT-min(F/N)": 0.4416812294851154,
      "MoBERT-max(F/N)": 0.5130541041028465,
      "MotionCritic": -1.7396385669708252,
      "VeMo (human-opt view)": 0.8813114754098361,
      "VeMo (max entropy view)": 0.8667496886674969,
      "VeMo (min entropy view)": 0.8813114754098361,
      "VeMo (random view)": 0.8813114754098361,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, bends down, and grabs his left knee in pain. He attempts to straighten up, walk forward, and then bends down to grab his knee again. Then, he walks backward to his left."
  },
  "009443": {
    "text": "a person attempts to get a rock out of their shoe.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5547708712057775,
      "Minus Multimodal Distance": -8.238039016723633,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.000614410440903157,
      "MoBERT-F": 0.5470535098175255,
      "MoBERT-N": 0.5521365378397092,
      "MoBERT-min(F/N)": 0.5470535098175255,
      "MoBERT-max(F/N)": 0.5521365378397092,
      "MotionCritic": -6.099955081939697,
      "VeMo (human-opt view)": 0.6371191135734072,
      "VeMo (max entropy view)": 0.4696969696969697,
      "VeMo (min entropy view)": 0.6371191135734072,
      "VeMo (random view)": 0.4696969696969697,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person attempts to get a rock out of their shoe."
  },
  "009449": {
    "text": "the person brings their hands up in front of them as if opening something, and then ends in a t-pose.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.47016404191434114,
      "Minus Multimodal Distance": -11.68447494506836,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3841061192797497e-05,
      "MoBERT-F": 0.4193023915738484,
      "MoBERT-N": 0.43238139462061065,
      "MoBERT-min(F/N)": 0.4193023915738484,
      "MoBERT-max(F/N)": 0.43238139462061065,
      "MotionCritic": -14.66226863861084,
      "VeMo (human-opt view)": 0.8438228438228438,
      "VeMo (max entropy view)": 0.7658767772511849,
      "VeMo (min entropy view)": 0.8438228438228438,
      "VeMo (random view)": 0.7658767772511849,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person brings their hands up in front of them as if opening something, and then ends in a T - pose."
  },
  "009485": {
    "text": "a figure raises a weight in front of themself.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7879428554785128,
      "Minus Multimodal Distance": -7.086852073669434,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0003248363209422678,
      "MoBERT-F": 0.45090744661159254,
      "MoBERT-N": 0.48494059582150895,
      "MoBERT-min(F/N)": 0.45090744661159254,
      "MoBERT-max(F/N)": 0.48494059582150895,
      "MotionCritic": -6.3482184410095215,
      "VeMo (human-opt view)": 0.00019133813069732906,
      "VeMo (max entropy view)": 0.00019133813069732906,
      "VeMo (min entropy view)": 5.1275206071316545e-05,
      "VeMo (random view)": 0.00019133813069732906,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure raises a weight in front of themselves."
  },
  "009488": {
    "text": "a person walks towards the left making a wide 's' shape.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6498570163982673,
      "Minus Multimodal Distance": -6.493635177612305,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3440357836079784e-05,
      "MoBERT-F": 0.5015052284183732,
      "MoBERT-N": 0.5726529902248402,
      "MoBERT-min(F/N)": 0.5015052284183732,
      "MoBERT-max(F/N)": 0.5726529902248402,
      "MotionCritic": -3.5698864459991455,
      "VeMo (human-opt view)": 0.8077858880778589,
      "VeMo (max entropy view)": 0.8077858880778589,
      "VeMo (min entropy view)": 0.8266883645240033,
      "VeMo (random view)": 0.8077858880778589,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks towards the left, making a wide 'S' shape."
  },
  "009493": {
    "text": "a person staggers around drunk and repeatedly gestures down with their right hand, as if mumbling an imaginary argument to themselves.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.399412500498512,
      "Minus Multimodal Distance": -11.015580177307129,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6773876015795395e-05,
      "MoBERT-F": 0.41917342795772533,
      "MoBERT-N": 0.5196299400907959,
      "MoBERT-min(F/N)": 0.41917342795772533,
      "MoBERT-max(F/N)": 0.5196299400907959,
      "MotionCritic": -5.501306533813477,
      "VeMo (human-opt view)": 0.5927342256214149,
      "VeMo (max entropy view)": 0.5927342256214149,
      "VeMo (min entropy view)": 0.39245283018867927,
      "VeMo (random view)": 0.39245283018867927,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person staggers around drunk and repeatedly gestures downward with their right hand, as if mumbling about an imaginary argument to themselves."
  },
  "009511": {
    "text": "figure does a quick small jump and then walks fowardsd and then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7792183738044006,
      "Minus Multimodal Distance": -14.520458221435547,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.650615394115448,
      "MoBERT-F": 0.6115553572364377,
      "MoBERT-N": 0.6224447691496738,
      "MoBERT-min(F/N)": 0.6115553572364377,
      "MoBERT-max(F/N)": 0.6224447691496738,
      "MotionCritic": -12.100358963012695,
      "VeMo (human-opt view)": 0.5626598465473146,
      "VeMo (max entropy view)": 0.5626598465473146,
      "VeMo (min entropy view)": 0.7304347826086957,
      "VeMo (random view)": 0.7304347826086957,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The figure does a quick small jump, then walks forward and then stops."
  },
  "009517": {
    "text": "this person dribbles with his right hand and shoots.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3229020971629815,
      "Minus Multimodal Distance": -5.98348331451416,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00011696752335410565,
      "MoBERT-F": 0.4309738443699848,
      "MoBERT-N": 0.4537422727684889,
      "MoBERT-min(F/N)": 0.4309738443699848,
      "MoBERT-max(F/N)": 0.4537422727684889,
      "MotionCritic": -6.0748138427734375,
      "VeMo (human-opt view)": 0.0016936367647268121,
      "VeMo (max entropy view)": 0.05654671908073783,
      "VeMo (min entropy view)": 0.0016936367647268121,
      "VeMo (random view)": 0.05654671908073783,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person dribbles with his right hand and shoots."
  },
  "009519": {
    "text": "a person standing scratches his head with his right hand and then lowers his hand back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5032210380571212,
      "Minus Multimodal Distance": -10.991829872131348,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9611430363729596e-05,
      "MoBERT-F": 0.2931240886969195,
      "MoBERT-N": 0.4513952409502102,
      "MoBERT-min(F/N)": 0.2931240886969195,
      "MoBERT-max(F/N)": 0.4513952409502102,
      "MotionCritic": -16.06627082824707,
      "VeMo (human-opt view)": 0.9149922720247295,
      "VeMo (max entropy view)": 0.9149922720247295,
      "VeMo (min entropy view)": 0.9840999803703462,
      "VeMo (random view)": 0.9840999803703462,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person standing scratches his head with his right hand and then lowers his hand back down."
  },
  "009554": {
    "text": "a person stands still for a second and then takes a step back with their right foot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3797343743156423,
      "Minus Multimodal Distance": -2.969869613647461,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2483887732960284e-05,
      "MoBERT-F": 0.3474710081700213,
      "MoBERT-N": 0.5386538363370472,
      "MoBERT-min(F/N)": 0.3474710081700213,
      "MoBERT-max(F/N)": 0.5386538363370472,
      "MotionCritic": -6.515047550201416,
      "VeMo (human-opt view)": 0.9193934557063048,
      "VeMo (max entropy view)": 0.7979539641943734,
      "VeMo (min entropy view)": 0.9193934557063048,
      "VeMo (random view)": 0.7979539641943734,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands still for a second and then takes a step back with their right foot."
  },
  "009566": {
    "text": "a man swings a golf club and hits the ball",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9611077605211081,
      "Minus Multimodal Distance": -4.625444412231445,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 9.592269634595141e-05,
      "MoBERT-F": 0.573064730075218,
      "MoBERT-N": 0.5829468275633587,
      "MoBERT-min(F/N)": 0.573064730075218,
      "MoBERT-max(F/N)": 0.5829468275633587,
      "MotionCritic": -1.691616177558899,
      "VeMo (human-opt view)": 3.7652111493975054e-05,
      "VeMo (max entropy view)": 0.0001496391522900224,
      "VeMo (min entropy view)": 3.7652111493975054e-05,
      "VeMo (random view)": 0.0001496391522900224,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man swings a golf club and hits the ball."
  },
  "009577": {
    "text": "the sim appears to be adjusting a watch that is placed on their right wrist.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.47720444232349873,
      "Minus Multimodal Distance": -7.824472427368164,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5581632144167088e-05,
      "MoBERT-F": 0.3954234927339363,
      "MoBERT-N": 0.45483591998507467,
      "MoBERT-min(F/N)": 0.3954234927339363,
      "MoBERT-max(F/N)": 0.45483591998507467,
      "MotionCritic": -8.912740707397461,
      "VeMo (human-opt view)": 0.21203830369357046,
      "VeMo (max entropy view)": 0.21203830369357046,
      "VeMo (min entropy view)": 0.031032537145006582,
      "VeMo (random view)": 0.031032537145006582,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The sim appears to be adjusting a watch that is placed on their right wrist."
  },
  "009584": {
    "text": "a person is sneaking around",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.709590911642911,
      "Minus Multimodal Distance": -13.362223625183105,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0005744353984482586,
      "MoBERT-F": 0.588396714561787,
      "MoBERT-N": 0.5713225110311961,
      "MoBERT-min(F/N)": 0.5713225110311961,
      "MoBERT-max(F/N)": 0.588396714561787,
      "MotionCritic": -12.133538246154785,
      "VeMo (human-opt view)": 0.9286043298019346,
      "VeMo (max entropy view)": 0.9286043298019346,
      "VeMo (min entropy view)": 0.9398847104035136,
      "VeMo (random view)": 0.9286043298019346,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is sneaking around."
  },
  "009600": {
    "text": "a person ballet dances in a triangle.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0127059504098912,
      "Minus Multimodal Distance": -4.447192668914795,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.842201840598136e-05,
      "MoBERT-F": 0.5498272687790604,
      "MoBERT-N": 0.5542675856097303,
      "MoBERT-min(F/N)": 0.5498272687790604,
      "MoBERT-max(F/N)": 0.5542675856097303,
      "MotionCritic": -7.348512649536133,
      "VeMo (human-opt view)": 4.129571374782894e-05,
      "VeMo (max entropy view)": 0.0001441104415367467,
      "VeMo (min entropy view)": 4.129571374782894e-05,
      "VeMo (random view)": 4.129571374782894e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person dances ballet in a triangle."
  },
  "009654": {
    "text": "someone waits a moment and jumps to the right",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0059547068201684,
      "Minus Multimodal Distance": -9.201370239257812,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.8930291339056566e-05,
      "MoBERT-F": 0.32166376043981065,
      "MoBERT-N": 0.45295654834136156,
      "MoBERT-min(F/N)": 0.32166376043981065,
      "MoBERT-max(F/N)": 0.45295654834136156,
      "MotionCritic": -3.541624069213867,
      "VeMo (human-opt view)": 0.17376133984647593,
      "VeMo (max entropy view)": 0.7436743674367436,
      "VeMo (min entropy view)": 0.17376133984647593,
      "VeMo (random view)": 0.7436743674367436,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone waits for a moment and jumps to the right."
  },
  "009666": {
    "text": "the person is sweeping the floor.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4688158875259605,
      "Minus Multimodal Distance": -3.686077117919922,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.004655726719647646,
      "MoBERT-F": 0.6807022137845149,
      "MoBERT-N": 0.4577217568644113,
      "MoBERT-min(F/N)": 0.4577217568644113,
      "MoBERT-max(F/N)": 0.6807022137845149,
      "MotionCritic": -7.956643581390381,
      "VeMo (human-opt view)": 0.13974231912784935,
      "VeMo (max entropy view)": 0.3075356415478615,
      "VeMo (min entropy view)": 0.13974231912784935,
      "VeMo (random view)": 0.3075356415478615,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is sweeping the floor."
  },
  "009671": {
    "text": "a figure sprints forward confidently",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6751837095544577,
      "Minus Multimodal Distance": -10.030252456665039,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2764104869565926e-05,
      "MoBERT-F": 0.4611341803485026,
      "MoBERT-N": 0.5802994650419271,
      "MoBERT-min(F/N)": 0.4611341803485026,
      "MoBERT-max(F/N)": 0.5802994650419271,
      "MotionCritic": -4.894161701202393,
      "VeMo (human-opt view)": 0.11234396671289876,
      "VeMo (max entropy view)": 0.11234396671289876,
      "VeMo (min entropy view)": 0.05337194869673149,
      "VeMo (random view)": 0.11234396671289876,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure sprints forward confidently."
  },
  "009673": {
    "text": "a person throws their right hand up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9668784749317708,
      "Minus Multimodal Distance": -3.9591829776763916,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.7134227972710505e-05,
      "MoBERT-F": 0.43645963548059313,
      "MoBERT-N": 0.5150624876318994,
      "MoBERT-min(F/N)": 0.43645963548059313,
      "MoBERT-max(F/N)": 0.5150624876318994,
      "MotionCritic": -6.718337059020996,
      "VeMo (human-opt view)": 0.9830481496219657,
      "VeMo (max entropy view)": 0.9431992631255757,
      "VeMo (min entropy view)": 0.9830481496219657,
      "VeMo (random view)": 0.9830481496219657,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person throws their right hand up."
  },
  "009689": {
    "text": "a person walks forwards, sits.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0216439073361607,
      "Minus Multimodal Distance": -14.780840873718262,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7203665013075806e-05,
      "MoBERT-F": 0.3765898974003824,
      "MoBERT-N": 0.38925691380030225,
      "MoBERT-min(F/N)": 0.3765898974003824,
      "MoBERT-max(F/N)": 0.38925691380030225,
      "MotionCritic": -4.4550700187683105,
      "VeMo (human-opt view)": 0.9603624009060022,
      "VeMo (max entropy view)": 0.9362488450877733,
      "VeMo (min entropy view)": 0.9603624009060022,
      "VeMo (random view)": 0.9603624009060022,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and sits."
  },
  "009709": {
    "text": "the man in a fighting stance turns around to the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0230957574241395,
      "Minus Multimodal Distance": -5.889774322509766,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7742575184674934e-05,
      "MoBERT-F": 0.3592171670209625,
      "MoBERT-N": 0.5277576110238158,
      "MoBERT-min(F/N)": 0.3592171670209625,
      "MoBERT-max(F/N)": 0.5277576110238158,
      "MotionCritic": -1.4590433835983276,
      "VeMo (human-opt view)": 0.5311203319502075,
      "VeMo (max entropy view)": 0.5311203319502075,
      "VeMo (min entropy view)": 0.7547169811320755,
      "VeMo (random view)": 0.5311203319502075,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man in a fighting stance turns to the right."
  },
  "009712": {
    "text": "a person is performing a ballet dance.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7667752080509054,
      "Minus Multimodal Distance": -11.474586486816406,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4023373043746687e-05,
      "MoBERT-F": 0.5887032811047862,
      "MoBERT-N": 0.5997001683109966,
      "MoBERT-min(F/N)": 0.5887032811047862,
      "MoBERT-max(F/N)": 0.5997001683109966,
      "MotionCritic": -4.018509387969971,
      "VeMo (human-opt view)": 0.00031463647724766595,
      "VeMo (max entropy view)": 0.00031463647724766595,
      "VeMo (min entropy view)": 0.0002459726705442051,
      "VeMo (random view)": 0.0002459726705442051,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is performing a ballet dance."
  },
  "009714": {
    "text": "a person makes a \"fast break\" motion, zig-zagging briefly from left to right then darting forward to the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.0155738243656174,
      "Minus Multimodal Distance": -10.056920051574707,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.32452043890953064,
      "MoBERT-F": 0.6721291559088356,
      "MoBERT-N": 0.7546466799288872,
      "MoBERT-min(F/N)": 0.6721291559088356,
      "MoBERT-max(F/N)": 0.7546466799288872,
      "MotionCritic": -13.369322776794434,
      "VeMo (human-opt view)": 0.2815198618307427,
      "VeMo (max entropy view)": 0.3630831643002028,
      "VeMo (min entropy view)": 0.2815198618307427,
      "VeMo (random view)": 0.2815198618307427,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person makes a \"fast break\" motion, zig - zagging briefly from left to right, then darting forward to the right."
  },
  "009716": {
    "text": "the person is doing basketball signals.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.64358306460404,
      "Minus Multimodal Distance": -6.088932514190674,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.20487627387046814,
      "MoBERT-F": 0.628516239490859,
      "MoBERT-N": 0.6616595509792275,
      "MoBERT-min(F/N)": 0.628516239490859,
      "MoBERT-max(F/N)": 0.6616595509792275,
      "MotionCritic": -5.812757968902588,
      "VeMo (human-opt view)": 0.7551401869158878,
      "VeMo (max entropy view)": 0.7551401869158878,
      "VeMo (min entropy view)": 0.8175182481751825,
      "VeMo (random view)": 0.8175182481751825,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is making basketball signals."
  },
  "009730": {
    "text": "person is acting like a human monkey.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.173083322968206,
      "Minus Multimodal Distance": -6.442965507507324,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.435844701016322e-05,
      "MoBERT-F": 0.5434699559907362,
      "MoBERT-N": 0.5937112200764929,
      "MoBERT-min(F/N)": 0.5434699559907362,
      "MoBERT-max(F/N)": 0.5937112200764929,
      "MotionCritic": -6.310973167419434,
      "VeMo (human-opt view)": 0.9147005444646098,
      "VeMo (max entropy view)": 0.9100817438692098,
      "VeMo (min entropy view)": 0.9147005444646098,
      "VeMo (random view)": 0.9100817438692098,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is acting like a human monkey."
  },
  "009749": {
    "text": "the person is doing something at the counter and moving it to the right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8600697581505012,
      "Minus Multimodal Distance": -6.126203536987305,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0006415515090338886,
      "MoBERT-F": 0.5773558257358847,
      "MoBERT-N": 0.5086582239996862,
      "MoBERT-min(F/N)": 0.5086582239996862,
      "MoBERT-max(F/N)": 0.5773558257358847,
      "MotionCritic": -3.237635612487793,
      "VeMo (human-opt view)": 4.827921051648735e-05,
      "VeMo (max entropy view)": 0.001324371200744751,
      "VeMo (min entropy view)": 4.827921051648735e-05,
      "VeMo (random view)": 0.001324371200744751,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is doing something at the counter and moving it to the right."
  },
  "009762": {
    "text": "a person throws a shotput.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4691073478257273,
      "Minus Multimodal Distance": -9.257741928100586,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.7269481420516968,
      "MoBERT-F": 0.6617399457510611,
      "MoBERT-N": 0.5509069765570562,
      "MoBERT-min(F/N)": 0.5509069765570562,
      "MoBERT-max(F/N)": 0.6617399457510611,
      "MotionCritic": -6.892773151397705,
      "VeMo (human-opt view)": 0.7435508345978755,
      "VeMo (max entropy view)": 0.7435508345978755,
      "VeMo (min entropy view)": 0.8269525267993875,
      "VeMo (random view)": 0.8269525267993875,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person throws a shot - put."
  },
  "009768": {
    "text": "a person side steps to the right and then to the left",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9447544790055618,
      "Minus Multimodal Distance": -6.283377170562744,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 9.842745203059167e-05,
      "MoBERT-F": 0.5529967668849093,
      "MoBERT-N": 0.7089892336863038,
      "MoBERT-min(F/N)": 0.5529967668849093,
      "MoBERT-max(F/N)": 0.7089892336863038,
      "MotionCritic": -5.441507816314697,
      "VeMo (human-opt view)": 0.967032967032967,
      "VeMo (max entropy view)": 0.9433478595231992,
      "VeMo (min entropy view)": 0.967032967032967,
      "VeMo (random view)": 0.967032967032967,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person side - steps to the right and then to the left."
  },
  "009773": {
    "text": "a man swings his arms to pick up a bucket and pour it out.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9254940194689885,
      "Minus Multimodal Distance": -10.157167434692383,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00044647298636846244,
      "MoBERT-F": 0.4818157308294637,
      "MoBERT-N": 0.5254598409166801,
      "MoBERT-min(F/N)": 0.4818157308294637,
      "MoBERT-max(F/N)": 0.5254598409166801,
      "MotionCritic": -1.3184503316879272,
      "VeMo (human-opt view)": 8.244420294122371e-05,
      "VeMo (max entropy view)": 8.244420294122371e-05,
      "VeMo (min entropy view)": 6.801648131256368e-05,
      "VeMo (random view)": 6.801648131256368e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man swings his arms to pick up a bucket and pour its contents out."
  },
  "009776": {
    "text": "a figure does a standing sprint",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.496167829018638,
      "Minus Multimodal Distance": -10.62504768371582,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3296261133509688e-05,
      "MoBERT-F": 0.413661275333828,
      "MoBERT-N": 0.5951903813379713,
      "MoBERT-min(F/N)": 0.413661275333828,
      "MoBERT-max(F/N)": 0.5951903813379713,
      "MotionCritic": -9.401348114013672,
      "VeMo (human-opt view)": 0.9808142255498362,
      "VeMo (max entropy view)": 0.97417479049085,
      "VeMo (min entropy view)": 0.9808142255498362,
      "VeMo (random view)": 0.9808142255498362,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure does a standing sprint."
  },
  "009777": {
    "text": "a person lifts their right forearm upwards towards their mouth and then puts it down again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5584739271315308,
      "Minus Multimodal Distance": -3.0430917739868164,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3105405489332043e-05,
      "MoBERT-F": 0.32972379020030274,
      "MoBERT-N": 0.4783550514498043,
      "MoBERT-min(F/N)": 0.32972379020030274,
      "MoBERT-max(F/N)": 0.4783550514498043,
      "MotionCritic": -11.04446029663086,
      "VeMo (human-opt view)": 0.9902897292176521,
      "VeMo (max entropy view)": 0.9553596815467729,
      "VeMo (min entropy view)": 0.9902897292176521,
      "VeMo (random view)": 0.9553596815467729,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lifts their right forearm upward toward their mouth and then puts it down again."
  },
  "009816": {
    "text": "a person slowly jumped forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7208318666607482,
      "Minus Multimodal Distance": -7.739704608917236,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.80445009493269e-05,
      "MoBERT-F": 0.6208511311513972,
      "MoBERT-N": 0.643195396020168,
      "MoBERT-min(F/N)": 0.6208511311513972,
      "MoBERT-max(F/N)": 0.643195396020168,
      "MotionCritic": -6.945403575897217,
      "VeMo (human-opt view)": 0.7773972602739726,
      "VeMo (max entropy view)": 0.693446088794926,
      "VeMo (min entropy view)": 0.7773972602739726,
      "VeMo (random view)": 0.693446088794926,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumped forward slowly."
  },
  "009831": {
    "text": "a person stiffly walks forward and backwards.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5158607923267169,
      "Minus Multimodal Distance": -6.366385459899902,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 6.658749771304429e-05,
      "MoBERT-F": 0.5446629127356772,
      "MoBERT-N": 0.6072611815031699,
      "MoBERT-min(F/N)": 0.5446629127356772,
      "MoBERT-max(F/N)": 0.6072611815031699,
      "MotionCritic": -9.552135467529297,
      "VeMo (human-opt view)": 0.8993357179356157,
      "VeMo (max entropy view)": 0.8810226155358899,
      "VeMo (min entropy view)": 0.8993357179356157,
      "VeMo (random view)": 0.8810226155358899,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks stiffly forwards and backwards."
  },
  "009867": {
    "text": "bending down and moving hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8155694987203496,
      "Minus Multimodal Distance": -3.666300058364868,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.3107975125312805,
      "MoBERT-F": 0.5836705354155884,
      "MoBERT-N": 0.5523400632648392,
      "MoBERT-min(F/N)": 0.5523400632648392,
      "MoBERT-max(F/N)": 0.5836705354155884,
      "MotionCritic": -8.222200393676758,
      "VeMo (human-opt view)": 0.9706591070163005,
      "VeMo (max entropy view)": 0.9706591070163005,
      "VeMo (min entropy view)": 0.9784329784329784,
      "VeMo (random view)": 0.9784329784329784,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is bending down and moving hands."
  },
  "009871": {
    "text": "a man side-skips from left to right repeatedly.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4188809858524623,
      "Minus Multimodal Distance": -4.8858466148376465,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8334216475486755,
      "MoBERT-F": 0.796074917804038,
      "MoBERT-N": 0.6760861038937119,
      "MoBERT-min(F/N)": 0.6760861038937119,
      "MoBERT-max(F/N)": 0.796074917804038,
      "MotionCritic": -5.474212169647217,
      "VeMo (human-opt view)": 0.955329500221141,
      "VeMo (max entropy view)": 0.9324991768192296,
      "VeMo (min entropy view)": 0.955329500221141,
      "VeMo (random view)": 0.955329500221141,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man side - skips from left to right repeatedly."
  },
  "009880": {
    "text": "a man beginning with his right leg slides to the right side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.419823177641433,
      "Minus Multimodal Distance": -7.761607646942139,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9026805350440554e-05,
      "MoBERT-F": 0.36949735664636973,
      "MoBERT-N": 0.518038570173569,
      "MoBERT-min(F/N)": 0.36949735664636973,
      "MoBERT-max(F/N)": 0.518038570173569,
      "MotionCritic": -7.210906505584717,
      "VeMo (human-opt view)": 0.7431693989071039,
      "VeMo (max entropy view)": 0.6792452830188679,
      "VeMo (min entropy view)": 0.7431693989071039,
      "VeMo (random view)": 0.7431693989071039,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man begins with his right leg and slides to the right side."
  },
  "009890": {
    "text": "moving hands to knees.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6629089557914754,
      "Minus Multimodal Distance": -10.720755577087402,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.96131256618537e-05,
      "MoBERT-F": 0.4474373490878689,
      "MoBERT-N": 0.4970599387282239,
      "MoBERT-min(F/N)": 0.4474373490878689,
      "MoBERT-max(F/N)": 0.4970599387282239,
      "MotionCritic": -15.258193016052246,
      "VeMo (human-opt view)": 0.3768545994065282,
      "VeMo (max entropy view)": 0.3768545994065282,
      "VeMo (min entropy view)": 0.23389830508474577,
      "VeMo (random view)": 0.3768545994065282,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves hands to knees."
  },
  "009903": {
    "text": "a person begins to walk forward up the stairs",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5828837086495866,
      "Minus Multimodal Distance": -5.225630283355713,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.04530183004681e-05,
      "MoBERT-F": 0.44631680279499797,
      "MoBERT-N": 0.6380646970341127,
      "MoBERT-min(F/N)": 0.44631680279499797,
      "MoBERT-max(F/N)": 0.6380646970341127,
      "MotionCritic": -5.007461071014404,
      "VeMo (human-opt view)": 0.2823086574654956,
      "VeMo (max entropy view)": 0.2823086574654956,
      "VeMo (min entropy view)": 0.14792899408284024,
      "VeMo (random view)": 0.2823086574654956,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person begins to walk forward up the stairs."
  },
  "009905": {
    "text": "a person sits cross legged then stands up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3249110790052816,
      "Minus Multimodal Distance": -12.211959838867188,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.107200245722197e-05,
      "MoBERT-F": 0.35542834957949343,
      "MoBERT-N": 0.36797550084484654,
      "MoBERT-min(F/N)": 0.35542834957949343,
      "MoBERT-max(F/N)": 0.36797550084484654,
      "MotionCritic": -5.472204208374023,
      "VeMo (human-opt view)": 0.9152542372881356,
      "VeMo (max entropy view)": 0.8872987477638641,
      "VeMo (min entropy view)": 0.9152542372881356,
      "VeMo (random view)": 0.9152542372881356,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits cross - legged and then stands up."
  },
  "009908": {
    "text": "moving forward on the floor.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7635113821152107,
      "Minus Multimodal Distance": -11.502739906311035,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3218966816784814e-05,
      "MoBERT-F": 0.5142984002814902,
      "MoBERT-N": 0.5801824802031454,
      "MoBERT-min(F/N)": 0.5142984002814902,
      "MoBERT-max(F/N)": 0.5801824802031454,
      "MotionCritic": -8.639972686767578,
      "VeMo (human-opt view)": 0.6654478976234004,
      "VeMo (max entropy view)": 0.6654478976234004,
      "VeMo (min entropy view)": 0.7876447876447876,
      "VeMo (random view)": 0.6654478976234004,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is moving forward on the floor."
  },
  "009910": {
    "text": "the person stand and touch the head.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6688370325092924,
      "Minus Multimodal Distance": -8.859742164611816,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.10681087523698807,
      "MoBERT-F": 0.4398929929908057,
      "MoBERT-N": 0.5127260914281377,
      "MoBERT-min(F/N)": 0.4398929929908057,
      "MoBERT-max(F/N)": 0.5127260914281377,
      "MotionCritic": -8.487625122070312,
      "VeMo (human-opt view)": 0.9902897292176521,
      "VeMo (max entropy view)": 0.9098862642169728,
      "VeMo (min entropy view)": 0.9902897292176521,
      "VeMo (random view)": 0.9902897292176521,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person stands and touches the head."
  },
  "009919": {
    "text": "a person is raising their arms in the air with their legs bent as if lifting weights.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1326884439745417,
      "Minus Multimodal Distance": -9.109587669372559,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8397011192282662e-05,
      "MoBERT-F": 0.37865858579787603,
      "MoBERT-N": 0.4410209318987929,
      "MoBERT-min(F/N)": 0.37865858579787603,
      "MoBERT-max(F/N)": 0.4410209318987929,
      "MotionCritic": -3.240835666656494,
      "VeMo (human-opt view)": 0.16457369464639787,
      "VeMo (max entropy view)": 0.182548794489093,
      "VeMo (min entropy view)": 0.16457369464639787,
      "VeMo (random view)": 0.16457369464639787,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is raising their arms in the air with their legs bent, as if they are lifting weights."
  },
  "009924": {
    "text": "the  person is running over a vault.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -3.587571666824599,
      "Minus Multimodal Distance": -5.48227596282959,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.6344019210664555e-05,
      "MoBERT-F": 0.5317984594532825,
      "MoBERT-N": 0.5915673911646477,
      "MoBERT-min(F/N)": 0.5317984594532825,
      "MoBERT-max(F/N)": 0.5915673911646477,
      "MotionCritic": -8.883920669555664,
      "VeMo (human-opt view)": 0.39166666666666666,
      "VeMo (max entropy view)": 0.42327150084317033,
      "VeMo (min entropy view)": 0.39166666666666666,
      "VeMo (random view)": 0.39166666666666666,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is running over a vault."
  },
  "009926": {
    "text": "a person walks forward before bending down to hold his left knee with both hands",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.135982227556525,
      "Minus Multimodal Distance": -8.245577812194824,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2252421331359074e-05,
      "MoBERT-F": 0.4026131911113937,
      "MoBERT-N": 0.5217626222112142,
      "MoBERT-min(F/N)": 0.4026131911113937,
      "MoBERT-max(F/N)": 0.5217626222112142,
      "MotionCritic": -7.3169264793396,
      "VeMo (human-opt view)": 0.23377960865087538,
      "VeMo (max entropy view)": 0.23377960865087538,
      "VeMo (min entropy view)": 0.2120451693851945,
      "VeMo (random view)": 0.2120451693851945,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward before bending down to hold his left knee with both hands."
  },
  "009941": {
    "text": "excited person starts with arms out and bounces from one foot to the other while clapping.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7526608950337614,
      "Minus Multimodal Distance": -11.528789520263672,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4381599359912798e-05,
      "MoBERT-F": 0.44670535975223935,
      "MoBERT-N": 0.4831696207030458,
      "MoBERT-min(F/N)": 0.44670535975223935,
      "MoBERT-max(F/N)": 0.4831696207030458,
      "MotionCritic": -7.036092758178711,
      "VeMo (human-opt view)": 0.48484848484848486,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.48484848484848486,
      "VeMo (random view)": 0.5,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "An excited person starts with their arms out and bounces from one foot to the other while clapping."
  },
  "009942": {
    "text": "the person is pouring some thing.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8738242599351114,
      "Minus Multimodal Distance": -10.178043365478516,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.344362292205915e-05,
      "MoBERT-F": 0.4615472645540087,
      "MoBERT-N": 0.4826246782656485,
      "MoBERT-min(F/N)": 0.4615472645540087,
      "MoBERT-max(F/N)": 0.4826246782656485,
      "MotionCritic": -4.945462226867676,
      "VeMo (human-opt view)": 0.0015977455432963882,
      "VeMo (max entropy view)": 0.0015977455432963882,
      "VeMo (min entropy view)": 0.0002039820037450179,
      "VeMo (random view)": 0.0015977455432963882,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is pouring something."
  },
  "009946": {
    "text": "a person does a swinging golf club motion",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7203776915619015,
      "Minus Multimodal Distance": -9.24777889251709,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.626603524433449e-05,
      "MoBERT-F": 0.5796586651093032,
      "MoBERT-N": 0.6342077134369647,
      "MoBERT-min(F/N)": 0.5796586651093032,
      "MoBERT-max(F/N)": 0.6342077134369647,
      "MotionCritic": -9.03231430053711,
      "VeMo (human-opt view)": 0.7060755336617406,
      "VeMo (max entropy view)": 0.3205944798301486,
      "VeMo (min entropy view)": 0.7060755336617406,
      "VeMo (random view)": 0.3205944798301486,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person makes a swinging motion with a golf club."
  },
  "009954": {
    "text": "a person slowly walks forward with hands down at sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8489714663282438,
      "Minus Multimodal Distance": -10.359825134277344,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2503685613628477e-05,
      "MoBERT-F": 0.39310322713137036,
      "MoBERT-N": 0.5437456606191415,
      "MoBERT-min(F/N)": 0.39310322713137036,
      "MoBERT-max(F/N)": 0.5437456606191415,
      "MotionCritic": -6.613352298736572,
      "VeMo (human-opt view)": 0.9796230747153927,
      "VeMo (max entropy view)": 0.8523206751054853,
      "VeMo (min entropy view)": 0.9796230747153927,
      "VeMo (random view)": 0.9796230747153927,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person slowly walks forward with hands down at the sides."
  },
  "009958": {
    "text": "a person walks backward in a counterclockwise circle",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.733988197674743,
      "Minus Multimodal Distance": -11.50588321685791,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8689651489257812,
      "MoBERT-F": 0.5372852630916287,
      "MoBERT-N": 0.527848205606704,
      "MoBERT-min(F/N)": 0.527848205606704,
      "MoBERT-max(F/N)": 0.5372852630916287,
      "MotionCritic": -14.732662200927734,
      "VeMo (human-opt view)": 0.8589473684210527,
      "VeMo (max entropy view)": 0.562390158172232,
      "VeMo (min entropy view)": 0.8589473684210527,
      "VeMo (random view)": 0.8589473684210527,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks backward in a counter - clockwise circle."
  },
  "009961": {
    "text": "a person is practicing karate moves across the floor",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.9559627062600726,
      "Minus Multimodal Distance": -6.125181198120117,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0001268535852432251,
      "MoBERT-F": 0.577532445260929,
      "MoBERT-N": 0.5306808814422657,
      "MoBERT-min(F/N)": 0.5306808814422657,
      "MoBERT-max(F/N)": 0.577532445260929,
      "MotionCritic": -2.232161045074463,
      "VeMo (human-opt view)": 0.8989784335981839,
      "VeMo (max entropy view)": 0.8352668213457076,
      "VeMo (min entropy view)": 0.8989784335981839,
      "VeMo (random view)": 0.8352668213457076,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is practicing karate moves across the floor."
  },
  "009968": {
    "text": "the man puts the box down and runs",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.752353929712881,
      "Minus Multimodal Distance": -10.407583236694336,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.433875826885924e-05,
      "MoBERT-F": 0.36500981999905774,
      "MoBERT-N": 0.4121310661051699,
      "MoBERT-min(F/N)": 0.36500981999905774,
      "MoBERT-max(F/N)": 0.4121310661051699,
      "MotionCritic": -14.37301254272461,
      "VeMo (human-opt view)": 1.2206882240207028e-05,
      "VeMo (max entropy view)": 2.7572139290164644e-05,
      "VeMo (min entropy view)": 1.2206882240207028e-05,
      "VeMo (random view)": 2.7572139290164644e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man puts the box down and runs."
  },
  "009972": {
    "text": "a person is doing jumping jacks.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9923334415320523,
      "Minus Multimodal Distance": -5.6832709312438965,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3671420422033407e-05,
      "MoBERT-F": 0.499776712037574,
      "MoBERT-N": 0.5272075086078898,
      "MoBERT-min(F/N)": 0.499776712037574,
      "MoBERT-max(F/N)": 0.5272075086078898,
      "MotionCritic": -7.370703220367432,
      "VeMo (human-opt view)": 0.8082901554404145,
      "VeMo (max entropy view)": 0.8077601410934744,
      "VeMo (min entropy view)": 0.8082901554404145,
      "VeMo (random view)": 0.8082901554404145,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is doing jumping jacks."
  },
  "009977": {
    "text": "person swings right arm as if pitching a ball to the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2431465394673336,
      "Minus Multimodal Distance": -3.6837170124053955,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.006689475849270821,
      "MoBERT-F": 0.5594733342034224,
      "MoBERT-N": 0.5073837625804275,
      "MoBERT-min(F/N)": 0.5073837625804275,
      "MoBERT-max(F/N)": 0.5594733342034224,
      "MotionCritic": -10.443907737731934,
      "VeMo (human-opt view)": 0.5154394299287411,
      "VeMo (max entropy view)": 0.5154394299287411,
      "VeMo (min entropy view)": 0.7304015296367112,
      "VeMo (random view)": 0.7304015296367112,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person swings their right arm as if pitching a ball to the left."
  },
  "009986": {
    "text": "walking forward making a 90 degree turn to the left at a gradual angle",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2423813249520135,
      "Minus Multimodal Distance": -4.858946323394775,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.938750185421668e-05,
      "MoBERT-F": 0.4555130152961112,
      "MoBERT-N": 0.6141884434399838,
      "MoBERT-min(F/N)": 0.4555130152961112,
      "MoBERT-max(F/N)": 0.6141884434399838,
      "MotionCritic": -7.2015485763549805,
      "VeMo (human-opt view)": 0.9434482758620689,
      "VeMo (max entropy view)": 0.9284750337381916,
      "VeMo (min entropy view)": 0.9434482758620689,
      "VeMo (random view)": 0.9434482758620689,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking forward, making a 90 - degree turn to the left at a gradual angle."
  },
  "010007": {
    "text": "the person is playing peekaboo.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8613844013348819,
      "Minus Multimodal Distance": -7.436435699462891,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00011487376468721777,
      "MoBERT-F": 0.5344814477735114,
      "MoBERT-N": 0.5445423627306509,
      "MoBERT-min(F/N)": 0.5344814477735114,
      "MoBERT-max(F/N)": 0.5445423627306509,
      "MotionCritic": -13.441888809204102,
      "VeMo (human-opt view)": 0.0029774991519780896,
      "VeMo (max entropy view)": 0.025983667409057165,
      "VeMo (min entropy view)": 0.0029774991519780896,
      "VeMo (random view)": 0.025983667409057165,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is playing peek - a - boo."
  },
  "010026": {
    "text": "the person tries to stable themselves standing still but takes a step forward after losing balance",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7121828374826632,
      "Minus Multimodal Distance": -7.1845173835754395,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4567858417867683e-05,
      "MoBERT-F": 0.36762787914473105,
      "MoBERT-N": 0.5542000836531588,
      "MoBERT-min(F/N)": 0.36762787914473105,
      "MoBERT-max(F/N)": 0.5542000836531588,
      "MotionCritic": -3.2404768466949463,
      "VeMo (human-opt view)": 0.9466972711043101,
      "VeMo (max entropy view)": 0.887055183084064,
      "VeMo (min entropy view)": 0.9466972711043101,
      "VeMo (random view)": 0.9466972711043101,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person tries to steady themselves while standing still but takes a step forward after losing balance."
  },
  "010032": {
    "text": "a person walks forward and jumps over an object, then turns around to jump over it again and walk back.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.048026857833719,
      "Minus Multimodal Distance": -8.050795555114746,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0003379903791937977,
      "MoBERT-F": 0.5425230828961745,
      "MoBERT-N": 0.6660065687104966,
      "MoBERT-min(F/N)": 0.5425230828961745,
      "MoBERT-max(F/N)": 0.6660065687104966,
      "MotionCritic": -15.6815767288208,
      "VeMo (human-opt view)": 0.004066312167657179,
      "VeMo (max entropy view)": 0.004066312167657179,
      "VeMo (min entropy view)": 0.0004880429477794046,
      "VeMo (random view)": 0.0004880429477794046,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward and jumps over an object. Then, they turn around to jump over it again and walk back."
  },
  "010037": {
    "text": "a right handed golfer takes a golf swing.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8266240594163199,
      "Minus Multimodal Distance": -4.2585906982421875,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4491106159985065e-05,
      "MoBERT-F": 0.4032684282918638,
      "MoBERT-N": 0.5318267827160146,
      "MoBERT-min(F/N)": 0.4032684282918638,
      "MoBERT-max(F/N)": 0.5318267827160146,
      "MotionCritic": -7.03153657913208,
      "VeMo (human-opt view)": 0.05020153902528399,
      "VeMo (max entropy view)": 0.05020153902528399,
      "VeMo (min entropy view)": 0.03520978283786633,
      "VeMo (random view)": 0.05020153902528399,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A right - handed golfer takes a golf swing."
  },
  "010039": {
    "text": "person walks forwards straight while stumbling",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1736382347442438,
      "Minus Multimodal Distance": -3.020051956176758,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9483579993247986,
      "MoBERT-F": 0.8787105038061291,
      "MoBERT-N": 0.9365618831228266,
      "MoBERT-min(F/N)": 0.8787105038061291,
      "MoBERT-max(F/N)": 0.9365618831228266,
      "MotionCritic": -8.390617370605469,
      "VeMo (human-opt view)": 0.9840955586352538,
      "VeMo (max entropy view)": 0.9433478595231992,
      "VeMo (min entropy view)": 0.9840955586352538,
      "VeMo (random view)": 0.9433478595231992,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward straight while stumbling."
  },
  "010043": {
    "text": "the person is doing the cha-cha.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8505308813294484,
      "Minus Multimodal Distance": -3.309844970703125,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.771880099317059e-05,
      "MoBERT-F": 0.5351730618098658,
      "MoBERT-N": 0.5682732879985128,
      "MoBERT-min(F/N)": 0.5351730618098658,
      "MoBERT-max(F/N)": 0.5682732879985128,
      "MotionCritic": -4.799482822418213,
      "VeMo (human-opt view)": 0.36338028169014086,
      "VeMo (max entropy view)": 0.36338028169014086,
      "VeMo (min entropy view)": 0.6510067114093959,
      "VeMo (random view)": 0.6510067114093959,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is doing the cha - cha."
  },
  "010068": {
    "text": "a person appears to be hitting a ball with their right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.087454756381158,
      "Minus Multimodal Distance": -8.901825904846191,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6740501198219135e-05,
      "MoBERT-F": 0.46386066534879855,
      "MoBERT-N": 0.4306037225314566,
      "MoBERT-min(F/N)": 0.4306037225314566,
      "MoBERT-max(F/N)": 0.46386066534879855,
      "MotionCritic": -8.126420021057129,
      "VeMo (human-opt view)": 0.00010867014516126493,
      "VeMo (max entropy view)": 0.0006663121975514709,
      "VeMo (min entropy view)": 0.00010867014516126493,
      "VeMo (random view)": 0.00010867014516126493,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person appears to be hitting a ball with their right hand."
  },
  "010072": {
    "text": "a figure walks down and right to stare at a wall",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.35206165422304,
      "Minus Multimodal Distance": -8.201848983764648,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8177183866500854,
      "MoBERT-F": 0.645455308273712,
      "MoBERT-N": 0.7594489866150438,
      "MoBERT-min(F/N)": 0.645455308273712,
      "MoBERT-max(F/N)": 0.7594489866150438,
      "MotionCritic": -11.795164108276367,
      "VeMo (human-opt view)": 0.743801652892562,
      "VeMo (max entropy view)": 0.743801652892562,
      "VeMo (min entropy view)": 0.8599605522682445,
      "VeMo (random view)": 0.743801652892562,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure walks down and to the right to stare at a wall."
  },
  "010083": {
    "text": "a person is standing and moves arms in a way that looks like they are picking something up and taking a drink or a bite",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5265132521952995,
      "Minus Multimodal Distance": -6.749706745147705,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.731648318236694e-05,
      "MoBERT-F": 0.3013757397779084,
      "MoBERT-N": 0.4241918952188472,
      "MoBERT-min(F/N)": 0.3013757397779084,
      "MoBERT-max(F/N)": 0.4241918952188472,
      "MotionCritic": -7.895585060119629,
      "VeMo (human-opt view)": 1.7839559504078464e-05,
      "VeMo (max entropy view)": 4.2813873516886426e-05,
      "VeMo (min entropy view)": 1.7839559504078464e-05,
      "VeMo (random view)": 1.7839559504078464e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is standing and moves their arms in a way that looks like they are picking something up and taking a drink or a bite."
  },
  "010145": {
    "text": "a person is pushed hard to their left and they recover into a standing position.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9093429110595305,
      "Minus Multimodal Distance": -8.748037338256836,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2450247342931107e-05,
      "MoBERT-F": 0.3633946524045508,
      "MoBERT-N": 0.47757496249692294,
      "MoBERT-min(F/N)": 0.3633946524045508,
      "MoBERT-max(F/N)": 0.47757496249692294,
      "MotionCritic": -8.84563159942627,
      "VeMo (human-opt view)": 0.6781857451403888,
      "VeMo (max entropy view)": 0.6781857451403888,
      "VeMo (min entropy view)": 0.6927835051546392,
      "VeMo (random view)": 0.6927835051546392,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is pushed hard to their left and then recovers to a standing position."
  },
  "010183": {
    "text": "figure appears to be carrying an itemm (large item)",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2941042992430605,
      "Minus Multimodal Distance": -9.347975730895996,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.9544250941835344e-05,
      "MoBERT-F": 0.5176570632861792,
      "MoBERT-N": 0.5955803935214501,
      "MoBERT-min(F/N)": 0.5176570632861792,
      "MoBERT-max(F/N)": 0.5955803935214501,
      "MotionCritic": -3.1822516918182373,
      "VeMo (human-opt view)": 0.0006660421779223045,
      "VeMo (max entropy view)": 0.0006660421779223045,
      "VeMo (min entropy view)": 0.00010213003395200885,
      "VeMo (random view)": 0.00010213003395200885,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The figure appears to be carrying an item (a large item)."
  },
  "010195": {
    "text": "a person, slowly walked forward, and after made circle with right hand",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7525680397841923,
      "Minus Multimodal Distance": -8.885485649108887,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.2912711203098297,
      "MoBERT-F": 0.6765354178093144,
      "MoBERT-N": 0.6587472035478076,
      "MoBERT-min(F/N)": 0.6587472035478076,
      "MoBERT-max(F/N)": 0.6765354178093144,
      "MotionCritic": -8.4241943359375,
      "VeMo (human-opt view)": 0.48484848484848486,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.48484848484848486,
      "VeMo (random view)": 0.5,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person slowly walked forward and then made a circle with the right hand."
  },
  "010199": {
    "text": "a figure wind mill kicks around the mosh pit.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6971382390697427,
      "Minus Multimodal Distance": -8.903656959533691,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9647599149029702e-05,
      "MoBERT-F": 0.43903373287883807,
      "MoBERT-N": 0.5165942096298448,
      "MoBERT-min(F/N)": 0.43903373287883807,
      "MoBERT-max(F/N)": 0.5165942096298448,
      "MotionCritic": -5.853565692901611,
      "VeMo (human-opt view)": 0.5770750988142292,
      "VeMo (max entropy view)": 0.5770750988142292,
      "VeMo (min entropy view)": 0.71939736346516,
      "VeMo (random view)": 0.71939736346516,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure does windmill kicks around the mosh pit."
  },
  "010223": {
    "text": "a man turns to his left and brings his hands up to touch in front of his chest as he kicks something with his left leg.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0195503229993332,
      "Minus Multimodal Distance": -2.915344715118408,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00013938755728304386,
      "MoBERT-F": 0.5464404168827335,
      "MoBERT-N": 0.5152514250561139,
      "MoBERT-min(F/N)": 0.5152514250561139,
      "MoBERT-max(F/N)": 0.5464404168827335,
      "MotionCritic": -3.2801904678344727,
      "VeMo (human-opt view)": 0.866965620328849,
      "VeMo (max entropy view)": 0.5770392749244713,
      "VeMo (min entropy view)": 0.866965620328849,
      "VeMo (random view)": 0.5770392749244713,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man turns to his left and brings his hands up to touch each other in front of his chest as he kicks something with his left leg."
  },
  "010248": {
    "text": "a person climbs up some ladders",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.9536575232225146,
      "Minus Multimodal Distance": -10.573453903198242,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8802066445350647,
      "MoBERT-F": 0.8095952188613196,
      "MoBERT-N": 0.6261714314415608,
      "MoBERT-min(F/N)": 0.6261714314415608,
      "MoBERT-max(F/N)": 0.8095952188613196,
      "MotionCritic": -12.993982315063477,
      "VeMo (human-opt view)": 5.262841678075083e-06,
      "VeMo (max entropy view)": 8.954419714853531e-06,
      "VeMo (min entropy view)": 5.262841678075083e-06,
      "VeMo (random view)": 5.262841678075083e-06,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person climbs up some ladders."
  },
  "010254": {
    "text": "he balances on his right foot and similarly flaps his arms to that of a large bird taking off",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.25662230487482,
      "Minus Multimodal Distance": -10.945305824279785,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.319258601346519e-05,
      "MoBERT-F": 0.37715491684389524,
      "MoBERT-N": 0.5181288849434595,
      "MoBERT-min(F/N)": 0.37715491684389524,
      "MoBERT-max(F/N)": 0.5181288849434595,
      "MotionCritic": -5.879867076873779,
      "VeMo (human-opt view)": 0.0013278266682539537,
      "VeMo (max entropy view)": 0.0013278266682539537,
      "VeMo (min entropy view)": 0.0010991426687183997,
      "VeMo (random view)": 0.0013278266682539537,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "He balances on his right foot and flaps his arms in a way similar to that of a large bird taking off"
  },
  "010270": {
    "text": "a person is jogging on a treadmill",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6313719832882805,
      "Minus Multimodal Distance": -8.04090690612793,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0008077037055045366,
      "MoBERT-F": 0.6785046415599556,
      "MoBERT-N": 0.6612148323672775,
      "MoBERT-min(F/N)": 0.6612148323672775,
      "MoBERT-max(F/N)": 0.6785046415599556,
      "MotionCritic": -7.248055934906006,
      "VeMo (human-opt view)": 0.8174904942965779,
      "VeMo (max entropy view)": 0.6357615894039735,
      "VeMo (min entropy view)": 0.8174904942965779,
      "VeMo (random view)": 0.8174904942965779,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is jogging on a treadmill."
  },
  "010280": {
    "text": "a man scratches his head with his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.535759749097938,
      "Minus Multimodal Distance": -6.466460704803467,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.1195147787220776e-05,
      "MoBERT-F": 0.2937368819890283,
      "MoBERT-N": 0.39188800452586536,
      "MoBERT-min(F/N)": 0.2937368819890283,
      "MoBERT-max(F/N)": 0.39188800452586536,
      "MotionCritic": -9.08282470703125,
      "VeMo (human-opt view)": 0.6654991243432574,
      "VeMo (max entropy view)": 0.6654991243432574,
      "VeMo (min entropy view)": 0.97556434721899,
      "VeMo (random view)": 0.6654991243432574,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man scratches his head with his right hand."
  },
  "010282": {
    "text": "a perso bend the knee and leand to the floor and start walked",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5654809524422248,
      "Minus Multimodal Distance": -7.300114631652832,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2352513042278588e-05,
      "MoBERT-F": 0.3810406156651986,
      "MoBERT-N": 0.4428216976781271,
      "MoBERT-min(F/N)": 0.3810406156651986,
      "MoBERT-max(F/N)": 0.4428216976781271,
      "MotionCritic": -6.996652126312256,
      "VeMo (human-opt view)": 0.8264182895850973,
      "VeMo (max entropy view)": 0.8264182895850973,
      "VeMo (min entropy view)": 0.8441432720232332,
      "VeMo (random view)": 0.8441432720232332,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends the knee, leans to the floor, and starts walking."
  },
  "010305": {
    "text": "both arms raise to the right, hit throw",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1755553825425813,
      "Minus Multimodal Distance": -5.6312665939331055,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.5874933004379272,
      "MoBERT-F": 0.7630163713066261,
      "MoBERT-N": 0.7321476535352538,
      "MoBERT-min(F/N)": 0.7321476535352538,
      "MoBERT-max(F/N)": 0.7630163713066261,
      "MotionCritic": -7.313513278961182,
      "VeMo (human-opt view)": 0.23439667128987518,
      "VeMo (max entropy view)": 0.5782608695652174,
      "VeMo (min entropy view)": 0.23439667128987518,
      "VeMo (random view)": 0.23439667128987518,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person raises both arms to the right, then hits and throws something."
  },
  "010315": {
    "text": "the person is walking forward with the cake.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.02308591954813,
      "Minus Multimodal Distance": -15.194268226623535,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.19954535365104675,
      "MoBERT-F": 0.7640609249040407,
      "MoBERT-N": 0.6224858263541118,
      "MoBERT-min(F/N)": 0.6224858263541118,
      "MoBERT-max(F/N)": 0.7640609249040407,
      "MotionCritic": -2.019216299057007,
      "VeMo (human-opt view)": 0.7304015296367112,
      "VeMo (max entropy view)": 0.6081081081081081,
      "VeMo (min entropy view)": 0.7304015296367112,
      "VeMo (random view)": 0.7304015296367112,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is walking forward with the cake."
  },
  "010384": {
    "text": "a person walks forward and picks things up and puts them down with their hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.508932609972495,
      "Minus Multimodal Distance": -11.03159236907959,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0001182414562208578,
      "MoBERT-F": 0.5006135447003197,
      "MoBERT-N": 0.575064129651978,
      "MoBERT-min(F/N)": 0.5006135447003197,
      "MoBERT-max(F/N)": 0.575064129651978,
      "MotionCritic": -12.295539855957031,
      "VeMo (human-opt view)": 0.5920245398773006,
      "VeMo (max entropy view)": 0.5920245398773006,
      "VeMo (min entropy view)": 0.7188755020080321,
      "VeMo (random view)": 0.5920245398773006,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, picks things up, and puts them down with their hands."
  },
  "010392": {
    "text": "a person sits their motion stutters",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4853542476172399,
      "Minus Multimodal Distance": -6.132328033447266,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3327918825089e-05,
      "MoBERT-F": 0.3806581668923637,
      "MoBERT-N": 0.36652958815844017,
      "MoBERT-min(F/N)": 0.36652958815844017,
      "MoBERT-max(F/N)": 0.3806581668923637,
      "MotionCritic": -8.085565567016602,
      "VeMo (human-opt view)": 0.9281284202845677,
      "VeMo (max entropy view)": 0.859538784067086,
      "VeMo (min entropy view)": 0.9281284202845677,
      "VeMo (random view)": 0.9281284202845677,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits there, while their motion stutters."
  },
  "010409": {
    "text": "the person moves backwards as if pushed by someone in front of them.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8720790607650373,
      "Minus Multimodal Distance": -8.782906532287598,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.996005117893219,
      "MoBERT-F": 0.6661434396352865,
      "MoBERT-N": 0.5542436175947967,
      "MoBERT-min(F/N)": 0.5542436175947967,
      "MoBERT-max(F/N)": 0.6661434396352865,
      "MotionCritic": -8.860292434692383,
      "VeMo (human-opt view)": 0.6374695863746959,
      "VeMo (max entropy view)": 0.5918854415274463,
      "VeMo (min entropy view)": 0.6374695863746959,
      "VeMo (random view)": 0.6374695863746959,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person moves backward as if pushed by someone in front of them."
  },
  "010441": {
    "text": "the person takes a swing with the baseball bat",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.215035162639395,
      "Minus Multimodal Distance": -2.6821177005767822,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0016979607753455639,
      "MoBERT-F": 0.6067926057006403,
      "MoBERT-N": 0.5232877661352904,
      "MoBERT-min(F/N)": 0.5232877661352904,
      "MoBERT-max(F/N)": 0.6067926057006403,
      "MotionCritic": -7.824126720428467,
      "VeMo (human-opt view)": 1.7839559504078464e-05,
      "VeMo (max entropy view)": 2.6629279417530192e-05,
      "VeMo (min entropy view)": 1.7839559504078464e-05,
      "VeMo (random view)": 1.7839559504078464e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person takes a swing with the baseball bat."
  },
  "010443": {
    "text": "a person walks in a clockwise circle swaying one arm and keeping the other arm still",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7607628487046583,
      "Minus Multimodal Distance": -11.970388412475586,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.711741217353847e-05,
      "MoBERT-F": 0.38023836609302925,
      "MoBERT-N": 0.41952406894255867,
      "MoBERT-min(F/N)": 0.38023836609302925,
      "MoBERT-max(F/N)": 0.41952406894255867,
      "MotionCritic": -13.285612106323242,
      "VeMo (human-opt view)": 0.8589473684210527,
      "VeMo (max entropy view)": 0.8589473684210527,
      "VeMo (min entropy view)": 0.8597748208802457,
      "VeMo (random view)": 0.8597748208802457,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks in a clockwise circle, swaying one arm and keeping the other arm still."
  },
  "010496": {
    "text": "a figure walks towards a wall and stares, confined.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6894604929814256,
      "Minus Multimodal Distance": -13.795105934143066,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.3536995033500716e-05,
      "MoBERT-F": 0.49793882011411017,
      "MoBERT-N": 0.5346772002871922,
      "MoBERT-min(F/N)": 0.49793882011411017,
      "MoBERT-max(F/N)": 0.5346772002871922,
      "MotionCritic": -9.614211082458496,
      "VeMo (human-opt view)": 0.8602150537634409,
      "VeMo (max entropy view)": 0.7312883435582822,
      "VeMo (min entropy view)": 0.8602150537634409,
      "VeMo (random view)": 0.8602150537634409,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure walks towards a wall and stares, confined."
  },
  "010499": {
    "text": "this person steps forward then sits on a stool to brush off his shoe.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0968488093449595,
      "Minus Multimodal Distance": -7.896205902099609,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.583299283287488e-05,
      "MoBERT-F": 0.4780044341516738,
      "MoBERT-N": 0.5789568555423015,
      "MoBERT-min(F/N)": 0.4780044341516738,
      "MoBERT-max(F/N)": 0.5789568555423015,
      "MotionCritic": -8.114616394042969,
      "VeMo (human-opt view)": 4.528836736213142e-05,
      "VeMo (max entropy view)": 0.00045945982014340486,
      "VeMo (min entropy view)": 4.528836736213142e-05,
      "VeMo (random view)": 0.00045945982014340486,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person steps forward and then sits on a stool to brush off his shoes."
  },
  "010520": {
    "text": "a person strafes to the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7793289113298798,
      "Minus Multimodal Distance": -15.148996353149414,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00011499926040414721,
      "MoBERT-F": 0.6053408087854684,
      "MoBERT-N": 0.6599092986601033,
      "MoBERT-min(F/N)": 0.6053408087854684,
      "MoBERT-max(F/N)": 0.6599092986601033,
      "MotionCritic": -10.930450439453125,
      "VeMo (human-opt view)": 0.9147788565264293,
      "VeMo (max entropy view)": 0.8989784335981839,
      "VeMo (min entropy view)": 0.9147788565264293,
      "VeMo (random view)": 0.9147788565264293,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves sideways to the right."
  },
  "010526": {
    "text": "a person makes a toast, then raises a glass to take a drink and wipes his lips with a napkin.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7868031522544607,
      "Minus Multimodal Distance": -10.634475708007812,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.727326475200243e-05,
      "MoBERT-F": 0.4848924784204302,
      "MoBERT-N": 0.5316224969888249,
      "MoBERT-min(F/N)": 0.4848924784204302,
      "MoBERT-max(F/N)": 0.5316224969888249,
      "MotionCritic": -5.399367332458496,
      "VeMo (human-opt view)": 8.182182258773234e-06,
      "VeMo (max entropy view)": 8.379756559316982e-06,
      "VeMo (min entropy view)": 8.182182258773234e-06,
      "VeMo (random view)": 8.182182258773234e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person makes a toast, then raises a glass to take a drink and wipes their lips with a napkin."
  },
  "010529": {
    "text": "person is walking forward while leaning on something with the left arm, then they stop and bring their arms out to the side",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8591703272595849,
      "Minus Multimodal Distance": -9.321677207946777,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00014130851195659488,
      "MoBERT-F": 0.6119726824093338,
      "MoBERT-N": 0.5906678383714662,
      "MoBERT-min(F/N)": 0.5906678383714662,
      "MoBERT-max(F/N)": 0.6119726824093338,
      "MotionCritic": -8.903630256652832,
      "VeMo (human-opt view)": 0.5467289719626168,
      "VeMo (max entropy view)": 0.5467289719626168,
      "VeMo (min entropy view)": 0.6656298600311042,
      "VeMo (random view)": 0.5467289719626168,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking forward while leaning on something with their left arm. Then, they stop and extend their arms out to the sides."
  },
  "010547": {
    "text": "the person is walking to the left like a robot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5858935360738813,
      "Minus Multimodal Distance": -11.060426712036133,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.13660410046577454,
      "MoBERT-F": 0.570431710045726,
      "MoBERT-N": 0.4427908569375084,
      "MoBERT-min(F/N)": 0.4427908569375084,
      "MoBERT-max(F/N)": 0.570431710045726,
      "MotionCritic": -2.8432235717773438,
      "VeMo (human-opt view)": 0.607940446650124,
      "VeMo (max entropy view)": 0.5628415300546448,
      "VeMo (min entropy view)": 0.607940446650124,
      "VeMo (random view)": 0.607940446650124,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is walking to the left like a robot."
  },
  "010553": {
    "text": "a person is walking slowly in zigzag",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.281487491946136,
      "Minus Multimodal Distance": -12.943209648132324,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.346240944461897e-05,
      "MoBERT-F": 0.40295642754702454,
      "MoBERT-N": 0.4790704991677247,
      "MoBERT-min(F/N)": 0.40295642754702454,
      "MoBERT-max(F/N)": 0.4790704991677247,
      "MotionCritic": -9.520651817321777,
      "VeMo (human-opt view)": 0.718336483931947,
      "VeMo (max entropy view)": 0.6508620689655172,
      "VeMo (min entropy view)": 0.718336483931947,
      "VeMo (random view)": 0.718336483931947,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking slowly in a zigzag."
  },
  "010557": {
    "text": "person puts hands on head then chest then knees then toes",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7709206148444113,
      "Minus Multimodal Distance": -3.511418342590332,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.437253533571493e-05,
      "MoBERT-F": 0.46744283329280145,
      "MoBERT-N": 0.5384861154696552,
      "MoBERT-min(F/N)": 0.46744283329280145,
      "MoBERT-max(F/N)": 0.5384861154696552,
      "MotionCritic": -5.439794063568115,
      "VeMo (human-opt view)": 0.6220095693779905,
      "VeMo (max entropy view)": 0.4375,
      "VeMo (min entropy view)": 0.6220095693779905,
      "VeMo (random view)": 0.6220095693779905,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person puts their hands on their head, then on their chest, then on their knees, and then on their toes."
  },
  "010563": {
    "text": "a person lifts their left arm up, twitches a bit, then brings their right arm up while putting their left arm down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8155525015176525,
      "Minus Multimodal Distance": -7.631012439727783,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.636820136103779e-05,
      "MoBERT-F": 0.44397764341192947,
      "MoBERT-N": 0.4640594458865249,
      "MoBERT-min(F/N)": 0.44397764341192947,
      "MoBERT-max(F/N)": 0.4640594458865249,
      "MotionCritic": -7.413143634796143,
      "VeMo (human-opt view)": 0.2691029900332226,
      "VeMo (max entropy view)": 0.3626707132018209,
      "VeMo (min entropy view)": 0.2691029900332226,
      "VeMo (random view)": 0.3626707132018209,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lifts their left arm up, twitches a bit, then brings their right arm up while putting their left arm down."
  },
  "010586": {
    "text": "a person carefully sits down on the ground and crosses their legs.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6423015374684564,
      "Minus Multimodal Distance": -7.560608386993408,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.286420567543246e-05,
      "MoBERT-F": 0.3751471919206143,
      "MoBERT-N": 0.4794138605435663,
      "MoBERT-min(F/N)": 0.3751471919206143,
      "MoBERT-max(F/N)": 0.4794138605435663,
      "MotionCritic": -10.904342651367188,
      "VeMo (human-opt view)": 5.26023908549579e-06,
      "VeMo (max entropy view)": 8.679743077393509e-06,
      "VeMo (min entropy view)": 5.26023908549579e-06,
      "VeMo (random view)": 5.26023908549579e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person carefully sits down on the ground and crosses their legs."
  },
  "010600": {
    "text": "the man take 4 excited steps forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0940608515598242,
      "Minus Multimodal Distance": -5.449416160583496,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.251352169783786e-05,
      "MoBERT-F": 0.3540217382272139,
      "MoBERT-N": 0.45974797354067914,
      "MoBERT-min(F/N)": 0.3540217382272139,
      "MoBERT-max(F/N)": 0.45974797354067914,
      "MotionCritic": -7.261916637420654,
      "VeMo (human-opt view)": 0.00017408548354135288,
      "VeMo (max entropy view)": 0.00017408548354135288,
      "VeMo (min entropy view)": 7.234542850608748e-05,
      "VeMo (random view)": 7.234542850608748e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man takes 4 excited steps forward."
  },
  "010617": {
    "text": "a person walks forward then climbs up something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0185185969272088,
      "Minus Multimodal Distance": -3.82104229927063,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4908013074309565e-05,
      "MoBERT-F": 0.4294252366833976,
      "MoBERT-N": 0.5756583366102145,
      "MoBERT-min(F/N)": 0.4294252366833976,
      "MoBERT-max(F/N)": 0.5756583366102145,
      "MotionCritic": -3.4350903034210205,
      "VeMo (human-opt view)": 0.010377293656793348,
      "VeMo (max entropy view)": 0.031115366203925323,
      "VeMo (min entropy view)": 0.010377293656793348,
      "VeMo (random view)": 0.010377293656793348,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and then climbs up something."
  },
  "010618": {
    "text": "person is throwing a ball hard.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9095268202945564,
      "Minus Multimodal Distance": -7.475742816925049,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8398529887199402,
      "MoBERT-F": 0.723311790568173,
      "MoBERT-N": 0.626992902734024,
      "MoBERT-min(F/N)": 0.626992902734024,
      "MoBERT-max(F/N)": 0.723311790568173,
      "MotionCritic": -9.44803237915039,
      "VeMo (human-opt view)": 0.0001354173348443057,
      "VeMo (max entropy view)": 0.0003362584200193077,
      "VeMo (min entropy view)": 0.0001354173348443057,
      "VeMo (random view)": 0.0003362584200193077,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is throwing a ball hard."
  },
  "010648": {
    "text": "a man steps forward, then suddenly staggers to his left, before returning to his original course.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1621877106089509,
      "Minus Multimodal Distance": -8.41185188293457,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0025630593299865723,
      "MoBERT-F": 0.4845770143729481,
      "MoBERT-N": 0.579195628608682,
      "MoBERT-min(F/N)": 0.4845770143729481,
      "MoBERT-max(F/N)": 0.579195628608682,
      "MotionCritic": -3.7362608909606934,
      "VeMo (human-opt view)": 0.9784172661870504,
      "VeMo (max entropy view)": 0.9756525553849529,
      "VeMo (min entropy view)": 0.9784172661870504,
      "VeMo (random view)": 0.9756525553849529,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man steps forward, then suddenly staggers to his left before returning to his original course."
  },
  "010651": {
    "text": "the person ran forward to kick a soccer ball.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.837833285058641,
      "Minus Multimodal Distance": -5.82747745513916,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.311287789780181e-05,
      "MoBERT-F": 0.4972295345806191,
      "MoBERT-N": 0.603757434514854,
      "MoBERT-min(F/N)": 0.4972295345806191,
      "MoBERT-max(F/N)": 0.603757434514854,
      "MotionCritic": -8.385194778442383,
      "VeMo (human-opt view)": 0.09032258064516129,
      "VeMo (max entropy view)": 0.25688073394495414,
      "VeMo (min entropy view)": 0.09032258064516129,
      "VeMo (random view)": 0.09032258064516129,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person ran forward to kick a soccer ball."
  },
  "010658": {
    "text": "a person walking like a bird and then sniffing the air.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7469888301882939,
      "Minus Multimodal Distance": -10.478060722351074,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9054597020149231,
      "MoBERT-F": 0.6696565787793671,
      "MoBERT-N": 0.7006850505425208,
      "MoBERT-min(F/N)": 0.6696565787793671,
      "MoBERT-max(F/N)": 0.7006850505425208,
      "MotionCritic": -13.044361114501953,
      "VeMo (human-opt view)": 0.2222222222222222,
      "VeMo (max entropy view)": 0.22245322245322247,
      "VeMo (min entropy view)": 0.2222222222222222,
      "VeMo (random view)": 0.2222222222222222,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking like a bird and then sniffing the air."
  },
  "010671": {
    "text": "a person lunges forward with one foot, reaching his arms out at the same time; this happens twice.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3363017823042398,
      "Minus Multimodal Distance": -8.43272876739502,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0005512585630640388,
      "MoBERT-F": 0.6481201473473778,
      "MoBERT-N": 0.5733426111154247,
      "MoBERT-min(F/N)": 0.5733426111154247,
      "MoBERT-max(F/N)": 0.6481201473473778,
      "MotionCritic": -11.775400161743164,
      "VeMo (human-opt view)": 0.6235632183908046,
      "VeMo (max entropy view)": 0.6235632183908046,
      "VeMo (min entropy view)": 0.6666666666666666,
      "VeMo (random view)": 0.6235632183908046,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lunges forward with one foot while reaching their arms out simultaneously; this occurs twice."
  },
  "010703": {
    "text": "hands are on the knees, hands go to the chest in a scratching motion, and then hands go back down to the knees.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2273650898414057,
      "Minus Multimodal Distance": -10.08348274230957,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3613971279701218e-05,
      "MoBERT-F": 0.33488498377965087,
      "MoBERT-N": 0.41644536872403215,
      "MoBERT-min(F/N)": 0.33488498377965087,
      "MoBERT-max(F/N)": 0.41644536872403215,
      "MotionCritic": 2.4517691135406494,
      "VeMo (human-opt view)": 0.9830278343516633,
      "VeMo (max entropy view)": 0.8740894901144641,
      "VeMo (min entropy view)": 0.9830278343516633,
      "VeMo (random view)": 0.9830278343516633,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person's hands are on the knees, hands go to the chest in a scratching motion, and then hands go back down to the knees."
  },
  "010706": {
    "text": "a man throws an object with his right hand while lifting his right leg off the ground.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3015345598396402,
      "Minus Multimodal Distance": -6.806447982788086,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8929141990374774e-05,
      "MoBERT-F": 0.3106369018923303,
      "MoBERT-N": 0.4322247444932301,
      "MoBERT-min(F/N)": 0.3106369018923303,
      "MoBERT-max(F/N)": 0.4322247444932301,
      "MotionCritic": -14.199128150939941,
      "VeMo (human-opt view)": 0.5150905432595574,
      "VeMo (max entropy view)": 0.5150905432595574,
      "VeMo (min entropy view)": 0.6228287841191067,
      "VeMo (random view)": 0.6228287841191067,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man throws an object with his right hand while lifting his right leg off the ground."
  },
  "010752": {
    "text": "staying still then backing up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7400099586943418,
      "Minus Multimodal Distance": -8.867091178894043,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.08676563203334808,
      "MoBERT-F": 0.6246537687011516,
      "MoBERT-N": 0.5727501913843364,
      "MoBERT-min(F/N)": 0.5727501913843364,
      "MoBERT-max(F/N)": 0.6246537687011516,
      "MotionCritic": -8.280153274536133,
      "VeMo (human-opt view)": 0.562874251497006,
      "VeMo (max entropy view)": 0.562874251497006,
      "VeMo (min entropy view)": 0.7435064935064936,
      "VeMo (random view)": 0.7435064935064936,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stays still then backs up."
  },
  "010785": {
    "text": "a person marches forward, turns around, and then marches back.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.521107068321774,
      "Minus Multimodal Distance": -12.224132537841797,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.328634819714352e-05,
      "MoBERT-F": 0.6104156073835191,
      "MoBERT-N": 0.649691093507396,
      "MoBERT-min(F/N)": 0.6104156073835191,
      "MoBERT-max(F/N)": 0.649691093507396,
      "MotionCritic": -11.337703704833984,
      "VeMo (human-opt view)": 0.9241379310344827,
      "VeMo (max entropy view)": 0.7306122448979592,
      "VeMo (min entropy view)": 0.9241379310344827,
      "VeMo (random view)": 0.7306122448979592,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person marches forward, turns around, and then marches back."
  },
  "010795": {
    "text": "a person starts a jogging on the place",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7465342824067772,
      "Minus Multimodal Distance": -6.855557441711426,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.5973338526673615e-05,
      "MoBERT-F": 0.4747412157887063,
      "MoBERT-N": 0.5645834881994213,
      "MoBERT-min(F/N)": 0.4747412157887063,
      "MoBERT-max(F/N)": 0.5645834881994213,
      "MotionCritic": -10.060958862304688,
      "VeMo (human-opt view)": 0.9668085106382979,
      "VeMo (max entropy view)": 0.7185840707964601,
      "VeMo (min entropy view)": 0.9668085106382979,
      "VeMo (random view)": 0.7185840707964601,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person starts jogging in the place."
  },
  "010797": {
    "text": "a person walks forward and side kicks then puts his hand to his face",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3838054629189636,
      "Minus Multimodal Distance": -13.981812477111816,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.09326896967832e-05,
      "MoBERT-F": 0.5008656969998646,
      "MoBERT-N": 0.6158077697264748,
      "MoBERT-min(F/N)": 0.5008656969998646,
      "MoBERT-max(F/N)": 0.6158077697264748,
      "MotionCritic": -8.841486930847168,
      "VeMo (human-opt view)": 0.031146685227074995,
      "VeMo (max entropy view)": 0.031146685227074995,
      "VeMo (min entropy view)": 0.001096538072656328,
      "VeMo (random view)": 0.031146685227074995,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, performs a side kick, and then puts his hand to his face."
  },
  "010810": {
    "text": "a person sitting on the floor scratches their head and then leans back on their left hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4707142887041242,
      "Minus Multimodal Distance": -8.033982276916504,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0002928678586613387,
      "MoBERT-F": 0.48316172784129063,
      "MoBERT-N": 0.49176654287694094,
      "MoBERT-min(F/N)": 0.48316172784129063,
      "MoBERT-max(F/N)": 0.49176654287694094,
      "MotionCritic": -12.090899467468262,
      "VeMo (human-opt view)": 0.006695069993913573,
      "VeMo (max entropy view)": 0.025962399283795883,
      "VeMo (min entropy view)": 0.006695069993913573,
      "VeMo (random view)": 0.006695069993913573,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sitting on the floor scratches their head and then leans back on their left hand."
  },
  "010816": {
    "text": "person is walking while kicking out legs",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.239258833879206,
      "Minus Multimodal Distance": -7.293293476104736,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.23109333193861e-05,
      "MoBERT-F": 0.46646250977015247,
      "MoBERT-N": 0.6806829666299132,
      "MoBERT-min(F/N)": 0.46646250977015247,
      "MoBERT-max(F/N)": 0.6806829666299132,
      "MotionCritic": -7.1184539794921875,
      "VeMo (human-opt view)": 0.9649368863955119,
      "VeMo (max entropy view)": 0.9433478595231992,
      "VeMo (min entropy view)": 0.9649368863955119,
      "VeMo (random view)": 0.9433478595231992,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking while kicking their legs out."
  },
  "010828": {
    "text": "a person walks forward and stumbles a bit",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4373354074372964,
      "Minus Multimodal Distance": -12.155878067016602,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0009641939541324973,
      "MoBERT-F": 0.5721376839967455,
      "MoBERT-N": 0.6274499761490772,
      "MoBERT-min(F/N)": 0.5721376839967455,
      "MoBERT-max(F/N)": 0.6274499761490772,
      "MotionCritic": -7.47376012802124,
      "VeMo (human-opt view)": 0.9433290978398984,
      "VeMo (max entropy view)": 0.8934945308002303,
      "VeMo (min entropy view)": 0.9433290978398984,
      "VeMo (random view)": 0.8934945308002303,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and stumbles a little."
  },
  "010843": {
    "text": "a person holds their right arm out on something to support them while sticking their right leg up to balance.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7286932405670438,
      "Minus Multimodal Distance": -5.370971202850342,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2870030079502612e-05,
      "MoBERT-F": 0.4375924707640795,
      "MoBERT-N": 0.5111805672512668,
      "MoBERT-min(F/N)": 0.4375924707640795,
      "MoBERT-max(F/N)": 0.5111805672512668,
      "MotionCritic": -2.6460044384002686,
      "VeMo (human-opt view)": 0.8436724565756824,
      "VeMo (max entropy view)": 0.7980769230769231,
      "VeMo (min entropy view)": 0.8436724565756824,
      "VeMo (random view)": 0.8436724565756824,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person holds their right arm out on something to support themselves while sticking their right leg up to balance."
  },
  "010850": {
    "text": "a figure waves with their right hand",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4146876263320898,
      "Minus Multimodal Distance": -8.412788391113281,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.137203359277919e-05,
      "MoBERT-F": 0.30062962270054294,
      "MoBERT-N": 0.35690474718700366,
      "MoBERT-min(F/N)": 0.30062962270054294,
      "MoBERT-max(F/N)": 0.35690474718700366,
      "MotionCritic": -13.325969696044922,
      "VeMo (human-opt view)": 0.7180451127819549,
      "VeMo (max entropy view)": 0.6797853309481217,
      "VeMo (min entropy view)": 0.7180451127819549,
      "VeMo (random view)": 0.6797853309481217,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure waves with their right hand."
  },
  "010874": {
    "text": "a man brings his hands down to his thighs, standing with his knees bent, before extending his arms to either side and then bringing them back down to his thighs.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6064264040510519,
      "Minus Multimodal Distance": -3.3891775608062744,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6426376280141994e-05,
      "MoBERT-F": 0.37000315672237977,
      "MoBERT-N": 0.4353863118755643,
      "MoBERT-min(F/N)": 0.37000315672237977,
      "MoBERT-max(F/N)": 0.4353863118755643,
      "MotionCritic": -12.751910209655762,
      "VeMo (human-opt view)": 0.9850317124735729,
      "VeMo (max entropy view)": 0.8441145281018028,
      "VeMo (min entropy view)": 0.9850317124735729,
      "VeMo (random view)": 0.8441145281018028,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man brings his hands down to his thighs, standing with his knees bent, before extending his arms to either side and then bringing them back down to his thighs."
  },
  "010897": {
    "text": "a person puts their hands on their hips and turns to the left side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5249569314521714,
      "Minus Multimodal Distance": -9.246923446655273,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.32420167978853e-05,
      "MoBERT-F": 0.35126076929563244,
      "MoBERT-N": 0.3873737490037661,
      "MoBERT-min(F/N)": 0.35126076929563244,
      "MoBERT-max(F/N)": 0.3873737490037661,
      "MotionCritic": -7.093045711517334,
      "VeMo (human-opt view)": 0.047312520115867396,
      "VeMo (max entropy view)": 0.2232606438213915,
      "VeMo (min entropy view)": 0.047312520115867396,
      "VeMo (random view)": 0.2232606438213915,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person puts their hands on their hips and turns to the left side."
  },
  "010898": {
    "text": "a person kicks something with their right foot.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1227483101002518,
      "Minus Multimodal Distance": -6.663819313049316,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.743466317653656,
      "MoBERT-F": 0.7902567752081302,
      "MoBERT-N": 0.5858127950889906,
      "MoBERT-min(F/N)": 0.5858127950889906,
      "MoBERT-max(F/N)": 0.7902567752081302,
      "MotionCritic": -8.154052734375,
      "VeMo (human-opt view)": 0.9554090732842186,
      "VeMo (max entropy view)": 0.899135446685879,
      "VeMo (min entropy view)": 0.9554090732842186,
      "VeMo (random view)": 0.9554090732842186,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person kicks something with their right foot."
  },
  "010909": {
    "text": "a person walks forward and then walks up some steps.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7179804258196355,
      "Minus Multimodal Distance": -6.255018711090088,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.416085019125603e-05,
      "MoBERT-F": 0.5022957652875082,
      "MoBERT-N": 0.6692442806080794,
      "MoBERT-min(F/N)": 0.5022957652875082,
      "MoBERT-max(F/N)": 0.6692442806080794,
      "MotionCritic": -3.2571046352386475,
      "VeMo (human-opt view)": 0.09011808576755749,
      "VeMo (max entropy view)": 0.25761772853185594,
      "VeMo (min entropy view)": 0.09011808576755749,
      "VeMo (random view)": 0.25761772853185594,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and then walks up some steps."
  },
  "010915": {
    "text": "a person enacting a mime with an invisible barrier in front of them touching with both hands",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.055790200094508,
      "Minus Multimodal Distance": -8.100828170776367,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.492889300105162e-05,
      "MoBERT-F": 0.35946769613484136,
      "MoBERT-N": 0.39323889668587264,
      "MoBERT-min(F/N)": 0.35946769613484136,
      "MoBERT-max(F/N)": 0.39323889668587264,
      "MotionCritic": -13.91451644897461,
      "VeMo (human-opt view)": 0.8990825688073395,
      "VeMo (max entropy view)": 0.8990825688073395,
      "VeMo (min entropy view)": 0.9435215946843853,
      "VeMo (random view)": 0.9435215946843853,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is enacting a mime, touching an invisible barrier in front of them with both hands."
  },
  "010917": {
    "text": "standing on one leg and swinging it.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.066566609375788,
      "Minus Multimodal Distance": -7.774165630340576,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.303495966771152e-05,
      "MoBERT-F": 0.47117850332486033,
      "MoBERT-N": 0.5562300767521782,
      "MoBERT-min(F/N)": 0.47117850332486033,
      "MoBERT-max(F/N)": 0.5562300767521782,
      "MotionCritic": -9.006365776062012,
      "VeMo (human-opt view)": 0.9466498103666245,
      "VeMo (max entropy view)": 0.9466498103666245,
      "VeMo (min entropy view)": 0.9688737973967176,
      "VeMo (random view)": 0.9466498103666245,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing on one leg and swinging it."
  },
  "010928": {
    "text": "this person kicks with his right foot while standing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.116846063675983,
      "Minus Multimodal Distance": -12.352490425109863,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.185358920949511e-05,
      "MoBERT-F": 0.49140368267964774,
      "MoBERT-N": 0.5922638907107243,
      "MoBERT-min(F/N)": 0.49140368267964774,
      "MoBERT-max(F/N)": 0.5922638907107243,
      "MotionCritic": -8.69754409790039,
      "VeMo (human-opt view)": 0.9326705829191143,
      "VeMo (max entropy view)": 0.9326705829191143,
      "VeMo (min entropy view)": 0.9498164014687882,
      "VeMo (random view)": 0.9498164014687882,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person kicks with his right foot while standing."
  },
  "010964": {
    "text": "a person lowers and walks on all fours to the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.066864356162242,
      "Minus Multimodal Distance": -5.341848373413086,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.400335688435007e-05,
      "MoBERT-F": 0.40343007602049963,
      "MoBERT-N": 0.4515640939070542,
      "MoBERT-min(F/N)": 0.40343007602049963,
      "MoBERT-max(F/N)": 0.4515640939070542,
      "MotionCritic": -10.683697700500488,
      "VeMo (human-opt view)": 0.9151157512482978,
      "VeMo (max entropy view)": 0.8994413407821229,
      "VeMo (min entropy view)": 0.9151157512482978,
      "VeMo (random view)": 0.8994413407821229,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lowers their body down and walks on all fours to the right."
  },
  "010967": {
    "text": "a person getting shade from arm",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0457526399751096,
      "Minus Multimodal Distance": -11.267810821533203,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.714139554882422e-05,
      "MoBERT-F": 0.47635173059940195,
      "MoBERT-N": 0.5136400583346132,
      "MoBERT-min(F/N)": 0.47635173059940195,
      "MoBERT-max(F/N)": 0.5136400583346132,
      "MotionCritic": -13.395048141479492,
      "VeMo (human-opt view)": 0.5766871165644172,
      "VeMo (max entropy view)": 0.5766871165644172,
      "VeMo (min entropy view)": 0.3069679849340866,
      "VeMo (random view)": 0.3069679849340866,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is getting shade from their arm."
  },
  "010996": {
    "text": "a person stands on his left leg, swings his right leg out to the side, back, and then he returns to a two-legged standing position.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9823346695344798,
      "Minus Multimodal Distance": -9.474886894226074,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3592361685587093e-05,
      "MoBERT-F": 0.45950101920217423,
      "MoBERT-N": 0.53192837885526,
      "MoBERT-min(F/N)": 0.45950101920217423,
      "MoBERT-max(F/N)": 0.53192837885526,
      "MotionCritic": -8.641319274902344,
      "VeMo (human-opt view)": 0.98414517669532,
      "VeMo (max entropy view)": 0.9819624279619789,
      "VeMo (min entropy view)": 0.98414517669532,
      "VeMo (random view)": 0.98414517669532,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands on their left leg, swings their right leg out to the side and then back, before returning to a two - legged standing position."
  },
  "011004": {
    "text": "a person picks something up in front of them moves it to the side then moves it back",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6379192172319507,
      "Minus Multimodal Distance": -9.533378601074219,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.210406794096343e-05,
      "MoBERT-F": 0.40740839752699354,
      "MoBERT-N": 0.4714526992814758,
      "MoBERT-min(F/N)": 0.40740839752699354,
      "MoBERT-max(F/N)": 0.4714526992814758,
      "MotionCritic": -3.067131519317627,
      "VeMo (human-opt view)": 0.718562874251497,
      "VeMo (max entropy view)": 0.40759493670886077,
      "VeMo (min entropy view)": 0.718562874251497,
      "VeMo (random view)": 0.40759493670886077,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person picks something up in front of them, moves it to the side, then moves it back."
  },
  "011035": {
    "text": "a person is walking then stops and sits down",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5703162516384455,
      "Minus Multimodal Distance": -9.3751859664917,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.563147609180305e-05,
      "MoBERT-F": 0.3893749851117696,
      "MoBERT-N": 0.41743448184590626,
      "MoBERT-min(F/N)": 0.3893749851117696,
      "MoBERT-max(F/N)": 0.41743448184590626,
      "MotionCritic": -6.326850891113281,
      "VeMo (human-opt view)": 0.9096744612563045,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.9096744612563045,
      "VeMo (random view)": 0.5,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking, then stops and sits down."
  },
  "011074": {
    "text": "a person walks in a counter clockwise circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2478361945434688,
      "Minus Multimodal Distance": -11.161450386047363,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3614647943759337e-05,
      "MoBERT-F": 0.3733173649007313,
      "MoBERT-N": 0.4536166263348946,
      "MoBERT-min(F/N)": 0.3733173649007313,
      "MoBERT-max(F/N)": 0.4536166263348946,
      "MotionCritic": -7.963563919067383,
      "VeMo (human-opt view)": 0.8517241379310345,
      "VeMo (max entropy view)": 0.8517241379310345,
      "VeMo (min entropy view)": 0.8674275680421423,
      "VeMo (random view)": 0.8517241379310345,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks in a counter - clockwise circle."
  },
  "011075": {
    "text": "a man bends over and puts his hands on the ground and is on all fours",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8634781775470188,
      "Minus Multimodal Distance": -10.945305824279785,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7084870453109033e-05,
      "MoBERT-F": 0.36053973785522286,
      "MoBERT-N": 0.5025064643315595,
      "MoBERT-min(F/N)": 0.36053973785522286,
      "MoBERT-max(F/N)": 0.5025064643315595,
      "MotionCritic": -8.49851131439209,
      "VeMo (human-opt view)": 2.0560826014648987e-06,
      "VeMo (max entropy view)": 3.286866714651722e-06,
      "VeMo (min entropy view)": 2.0560826014648987e-06,
      "VeMo (random view)": 3.286866714651722e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man bends over, puts his hands on the ground, and is on all fours."
  },
  "011091": {
    "text": "the person walked forward and then turn right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8089444633462766,
      "Minus Multimodal Distance": -11.51926040649414,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.682059130165726e-05,
      "MoBERT-F": 0.45062124911822365,
      "MoBERT-N": 0.6167437776454714,
      "MoBERT-min(F/N)": 0.45062124911822365,
      "MoBERT-max(F/N)": 0.6167437776454714,
      "MotionCritic": -5.706105709075928,
      "VeMo (human-opt view)": 0.9819540504014628,
      "VeMo (max entropy view)": 0.9819540504014628,
      "VeMo (min entropy view)": 0.9831241886629165,
      "VeMo (random view)": 0.9831241886629165,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person walked forward and then turned right."
  },
  "011127": {
    "text": "a person touches toward his pelvis, then extends hand and raises it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5960227701743744,
      "Minus Multimodal Distance": -9.580254554748535,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5894678401527926e-05,
      "MoBERT-F": 0.3782012737903804,
      "MoBERT-N": 0.45227703830008775,
      "MoBERT-min(F/N)": 0.3782012737903804,
      "MoBERT-max(F/N)": 0.45227703830008775,
      "MotionCritic": -10.702905654907227,
      "VeMo (human-opt view)": 0.9929020482660718,
      "VeMo (max entropy view)": 0.9755455642066017,
      "VeMo (min entropy view)": 0.9929020482660718,
      "VeMo (random view)": 0.9755455642066017,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person touches his pelvis, then extends his hand and raises it."
  },
  "011136": {
    "text": "a person who is jumping around with one leg but alternating the legs every jump",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8945443542028044,
      "Minus Multimodal Distance": -2.969869613647461,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.1154334619641304,
      "MoBERT-F": 0.759473524604575,
      "MoBERT-N": 0.8038159023363282,
      "MoBERT-min(F/N)": 0.759473524604575,
      "MoBERT-max(F/N)": 0.8038159023363282,
      "MotionCritic": -12.154668807983398,
      "VeMo (human-opt view)": 0.28192161820480405,
      "VeMo (max entropy view)": 0.28192161820480405,
      "VeMo (min entropy view)": 0.06394557823129252,
      "VeMo (random view)": 0.28192161820480405,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who is jumping around on one leg alternates legs every jump."
  },
  "011146": {
    "text": "the figure rises from a laying position and walks in a clockwise circle, and then lays back down the ground.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.057310888980543,
      "Minus Multimodal Distance": -2.338771343231201,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.01386968046426773,
      "MoBERT-F": 0.5211918887039904,
      "MoBERT-N": 0.41140366270340123,
      "MoBERT-min(F/N)": 0.41140366270340123,
      "MoBERT-max(F/N)": 0.5211918887039904,
      "MotionCritic": -13.913063049316406,
      "VeMo (human-opt view)": 0.6088082901554405,
      "VeMo (max entropy view)": 0.6088082901554405,
      "VeMo (min entropy view)": 0.6088888888888889,
      "VeMo (random view)": 0.6088888888888889,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The figure rises from a lying position and walks in a clockwise circle, then lies back down on the ground."
  },
  "011158": {
    "text": "a man jogs forward a few steps, walks back to his spot and squats down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2886757444369004,
      "Minus Multimodal Distance": -9.916728973388672,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.8022200897103176e-05,
      "MoBERT-F": 0.49785554541730037,
      "MoBERT-N": 0.6239900981647424,
      "MoBERT-min(F/N)": 0.49785554541730037,
      "MoBERT-max(F/N)": 0.6239900981647424,
      "MotionCritic": -17.857053756713867,
      "VeMo (human-opt view)": 0.8670181605155243,
      "VeMo (max entropy view)": 0.8670181605155243,
      "VeMo (min entropy view)": 0.9363077917879081,
      "VeMo (random view)": 0.9363077917879081,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man jogs forward a few steps, walks back to his spot, and squats down."
  },
  "011162": {
    "text": "the person is painting a wall.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.084897890798734,
      "Minus Multimodal Distance": -11.138684272766113,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.15833798050880432,
      "MoBERT-F": 0.7335430444633048,
      "MoBERT-N": 0.6924789843011104,
      "MoBERT-min(F/N)": 0.6924789843011104,
      "MoBERT-max(F/N)": 0.7335430444633048,
      "MotionCritic": -8.462916374206543,
      "VeMo (human-opt view)": 2.268735858171323e-06,
      "VeMo (max entropy view)": 1.1829707756499352e-05,
      "VeMo (min entropy view)": 2.268735858171323e-06,
      "VeMo (random view)": 1.1829707756499352e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is painting a wall."
  },
  "011184": {
    "text": "the person is running forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0761241727518047,
      "Minus Multimodal Distance": -6.793235778808594,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2994006940280087e-05,
      "MoBERT-F": 0.4687524597557256,
      "MoBERT-N": 0.6250106107407533,
      "MoBERT-min(F/N)": 0.4687524597557256,
      "MoBERT-max(F/N)": 0.6250106107407533,
      "MotionCritic": -10.958674430847168,
      "VeMo (human-opt view)": 0.9364928909952607,
      "VeMo (max entropy view)": 0.8745519713261649,
      "VeMo (min entropy view)": 0.9364928909952607,
      "VeMo (random view)": 0.9364928909952607,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is running forward."
  },
  "011189": {
    "text": "a person walking down and taking a left turn",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2522129941998685,
      "Minus Multimodal Distance": -8.288010597229004,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2448706658906303e-05,
      "MoBERT-F": 0.3711969123835363,
      "MoBERT-N": 0.5324499754185679,
      "MoBERT-min(F/N)": 0.3711969123835363,
      "MoBERT-max(F/N)": 0.5324499754185679,
      "MotionCritic": -9.332164764404297,
      "VeMo (human-opt view)": 0.8670391061452514,
      "VeMo (max entropy view)": 0.8670391061452514,
      "VeMo (min entropy view)": 0.9363402797602055,
      "VeMo (random view)": 0.8670391061452514,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking down and takes a left turn."
  },
  "011210": {
    "text": "person is walking wobbling forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9928328402550788,
      "Minus Multimodal Distance": -8.409635543823242,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.22719058394432068,
      "MoBERT-F": 0.7073874786160395,
      "MoBERT-N": 0.6676829356788894,
      "MoBERT-min(F/N)": 0.6676829356788894,
      "MoBERT-max(F/N)": 0.7073874786160395,
      "MotionCritic": -9.208444595336914,
      "VeMo (human-opt view)": 0.7660910518053375,
      "VeMo (max entropy view)": 0.6370967741935484,
      "VeMo (min entropy view)": 0.7660910518053375,
      "VeMo (random view)": 0.6370967741935484,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking wobblingly forward."
  },
  "011211": {
    "text": "the man is using his arms and legs to propel him self forward in a running fashion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6335253303996264,
      "Minus Multimodal Distance": -10.191948890686035,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6565068765194155e-05,
      "MoBERT-F": 0.4956919379982827,
      "MoBERT-N": 0.5106286123386006,
      "MoBERT-min(F/N)": 0.4956919379982827,
      "MoBERT-max(F/N)": 0.5106286123386006,
      "MotionCritic": -12.70718002319336,
      "VeMo (human-opt view)": 0.8522829006266786,
      "VeMo (max entropy view)": 0.7874015748031497,
      "VeMo (min entropy view)": 0.8522829006266786,
      "VeMo (random view)": 0.8522829006266786,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man is using his arms and legs to propel himself forward in a running fashion."
  },
  "011213": {
    "text": "he stands and wobbles slightly, as if trying to stay steady in the wind",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8929337427883681,
      "Minus Multimodal Distance": -4.64118766784668,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00040034292032942176,
      "MoBERT-F": 0.49670874844329704,
      "MoBERT-N": 0.5235926774834716,
      "MoBERT-min(F/N)": 0.49670874844329704,
      "MoBERT-max(F/N)": 0.5235926774834716,
      "MotionCritic": -13.556462287902832,
      "VeMo (human-opt view)": 0.6224677716390423,
      "VeMo (max entropy view)": 0.5317647058823529,
      "VeMo (min entropy view)": 0.6224677716390423,
      "VeMo (random view)": 0.6224677716390423,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "He stands and wobbles slightly, as if trying to stay steady in the wind."
  },
  "011223": {
    "text": "this person waves his right arm up and down as if to enjoy a beat.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.915075921255773,
      "Minus Multimodal Distance": -11.22375774383545,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.137446037726477e-05,
      "MoBERT-F": 0.28272571466527724,
      "MoBERT-N": 0.45010024790394826,
      "MoBERT-min(F/N)": 0.28272571466527724,
      "MoBERT-max(F/N)": 0.45010024790394826,
      "MotionCritic": -5.5023956298828125,
      "VeMo (human-opt view)": 0.4380952380952381,
      "VeMo (max entropy view)": 0.5466237942122186,
      "VeMo (min entropy view)": 0.4380952380952381,
      "VeMo (random view)": 0.5466237942122186,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person waves his right arm up and down as if to enjoy a beat."
  },
  "011224": {
    "text": "running forward and stopping.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.627776897044661,
      "Minus Multimodal Distance": -9.125484466552734,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.4923148127272725e-05,
      "MoBERT-F": 0.550533199596672,
      "MoBERT-N": 0.6463338151492073,
      "MoBERT-min(F/N)": 0.550533199596672,
      "MoBERT-max(F/N)": 0.6463338151492073,
      "MotionCritic": -11.346681594848633,
      "VeMo (human-opt view)": 0.9959319088431613,
      "VeMo (max entropy view)": 0.9819853397937632,
      "VeMo (min entropy view)": 0.9959319088431613,
      "VeMo (random view)": 0.9819853397937632,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs forward and stops."
  },
  "011226": {
    "text": "the person uses the left to grab the right elbow and swing it. the right arm raises up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4641157345215214,
      "Minus Multimodal Distance": -7.973700046539307,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3507513105869293e-05,
      "MoBERT-F": 0.4781293169657779,
      "MoBERT-N": 0.5686268928374892,
      "MoBERT-min(F/N)": 0.4781293169657779,
      "MoBERT-max(F/N)": 0.5686268928374892,
      "MotionCritic": -9.332639694213867,
      "VeMo (human-opt view)": 0.08982035928143713,
      "VeMo (max entropy view)": 0.3479212253829322,
      "VeMo (min entropy view)": 0.08982035928143713,
      "VeMo (random view)": 0.08982035928143713,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person uses the left hand to grab the right elbow and swing it. The right arm raises up."
  },
  "011252": {
    "text": "the person is walking up the stairs and stood still.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3794024121589497,
      "Minus Multimodal Distance": -13.705161094665527,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4366237994399853e-05,
      "MoBERT-F": 0.48546243388508403,
      "MoBERT-N": 0.602192383129925,
      "MoBERT-min(F/N)": 0.48546243388508403,
      "MoBERT-max(F/N)": 0.602192383129925,
      "MotionCritic": -7.431452751159668,
      "VeMo (human-opt view)": 0.016909771524461083,
      "VeMo (max entropy view)": 0.016909771524461083,
      "VeMo (min entropy view)": 0.006690904646970762,
      "VeMo (random view)": 0.016909771524461083,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was walking up the stairs and then stood still."
  },
  "011255": {
    "text": "a man lifts his right arm up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9193663816296774,
      "Minus Multimodal Distance": -7.266057014465332,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8863172701676376e-05,
      "MoBERT-F": 0.32283288850564984,
      "MoBERT-N": 0.4863041671291427,
      "MoBERT-min(F/N)": 0.32283288850564984,
      "MoBERT-max(F/N)": 0.4863041671291427,
      "MotionCritic": -9.795526504516602,
      "VeMo (human-opt view)": 0.9883306547411083,
      "VeMo (max entropy view)": 0.7058823529411765,
      "VeMo (min entropy view)": 0.9883306547411083,
      "VeMo (random view)": 0.7058823529411765,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man lifts his right arm up."
  },
  "011257": {
    "text": "a person throws something.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1962609790960272,
      "Minus Multimodal Distance": -7.6545090675354,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.01099411677569151,
      "MoBERT-F": 0.5862046052676815,
      "MoBERT-N": 0.4166172766548547,
      "MoBERT-min(F/N)": 0.4166172766548547,
      "MoBERT-max(F/N)": 0.5862046052676815,
      "MotionCritic": -12.550447463989258,
      "VeMo (human-opt view)": 0.6654867256637168,
      "VeMo (max entropy view)": 0.6654867256637168,
      "VeMo (min entropy view)": 0.7431506849315068,
      "VeMo (random view)": 0.7431506849315068,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person throws something."
  },
  "011284": {
    "text": "a person picks something up with their right hand and walks forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8265687079208177,
      "Minus Multimodal Distance": -6.252774715423584,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.06899624317884445,
      "MoBERT-F": 0.603459337487485,
      "MoBERT-N": 0.44784850726950454,
      "MoBERT-min(F/N)": 0.44784850726950454,
      "MoBERT-max(F/N)": 0.603459337487485,
      "MotionCritic": -7.571935653686523,
      "VeMo (human-opt view)": 0.5783582089552238,
      "VeMo (max entropy view)": 0.5783582089552238,
      "VeMo (min entropy view)": 0.08033240997229917,
      "VeMo (random view)": 0.5783582089552238,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person picks something up with their right hand and walks forward."
  },
  "011295": {
    "text": "a person is walking forward strangely",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2543865435011372,
      "Minus Multimodal Distance": -7.768608093261719,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.28996726870536804,
      "MoBERT-F": 0.7785832755272879,
      "MoBERT-N": 0.6320114071317015,
      "MoBERT-min(F/N)": 0.6320114071317015,
      "MoBERT-max(F/N)": 0.7785832755272879,
      "MotionCritic": -10.388960838317871,
      "VeMo (human-opt view)": 0.9819879919946631,
      "VeMo (max entropy view)": 0.9362653745806933,
      "VeMo (min entropy view)": 0.9819879919946631,
      "VeMo (random view)": 0.9819879919946631,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking forward strangely."
  },
  "011307": {
    "text": "person walks forward, stumbles back, and continues forward",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.075864600887946,
      "Minus Multimodal Distance": -12.143720626831055,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9947762489318848,
      "MoBERT-F": 0.7140382858289365,
      "MoBERT-N": 0.8050107391627301,
      "MoBERT-min(F/N)": 0.7140382858289365,
      "MoBERT-max(F/N)": 0.8050107391627301,
      "MotionCritic": -6.627304553985596,
      "VeMo (human-opt view)": 0.9243633599391866,
      "VeMo (max entropy view)": 0.8934269304403318,
      "VeMo (min entropy view)": 0.9243633599391866,
      "VeMo (random view)": 0.8934269304403318,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, stumbles back, and continues forward."
  },
  "011332": {
    "text": "walking diagonally to the left",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6768072866331976,
      "Minus Multimodal Distance": -3.7294228076934814,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.007913102395832539,
      "MoBERT-F": 0.7176367246023552,
      "MoBERT-N": 0.791468616158132,
      "MoBERT-min(F/N)": 0.7176367246023552,
      "MoBERT-max(F/N)": 0.791468616158132,
      "MotionCritic": -7.58966588973999,
      "VeMo (human-opt view)": 0.8437233134073442,
      "VeMo (max entropy view)": 0.8350877192982457,
      "VeMo (min entropy view)": 0.8437233134073442,
      "VeMo (random view)": 0.8350877192982457,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking diagonally to the left."
  },
  "011339": {
    "text": "a person who is standing with his hands at his sides quickly runs forward and stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1939758433148389,
      "Minus Multimodal Distance": -7.418331146240234,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.969924025819637e-05,
      "MoBERT-F": 0.4042978629118472,
      "MoBERT-N": 0.5128216340138356,
      "MoBERT-min(F/N)": 0.4042978629118472,
      "MoBERT-max(F/N)": 0.5128216340138356,
      "MotionCritic": -7.317920207977295,
      "VeMo (human-opt view)": 0.986740672217083,
      "VeMo (max entropy view)": 0.9626955475330926,
      "VeMo (min entropy view)": 0.986740672217083,
      "VeMo (random view)": 0.986740672217083,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who is standing with their hands at their sides quickly runs forward and then stops."
  },
  "011340": {
    "text": "the person was pushed but stayed standing.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7094284335088046,
      "Minus Multimodal Distance": -9.284469604492188,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3022421373752877e-05,
      "MoBERT-F": 0.3861659177648254,
      "MoBERT-N": 0.5018978356178876,
      "MoBERT-min(F/N)": 0.3861659177648254,
      "MoBERT-max(F/N)": 0.5018978356178876,
      "MotionCritic": -10.193790435791016,
      "VeMo (human-opt view)": 0.9197604790419162,
      "VeMo (max entropy view)": 0.7547169811320755,
      "VeMo (min entropy view)": 0.9197604790419162,
      "VeMo (random view)": 0.9197604790419162,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was pushed but stayed standing."
  },
  "011351": {
    "text": "walks forward, turns around, and walks back.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.462336591430566,
      "Minus Multimodal Distance": -8.284366607666016,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5702528953552246,
      "MoBERT-F": 0.6992390901241101,
      "MoBERT-N": 0.7132144770867926,
      "MoBERT-min(F/N)": 0.6992390901241101,
      "MoBERT-max(F/N)": 0.7132144770867926,
      "MotionCritic": -15.102202415466309,
      "VeMo (human-opt view)": 0.9919369288657947,
      "VeMo (max entropy view)": 0.97556434721899,
      "VeMo (min entropy view)": 0.9919369288657947,
      "VeMo (random view)": 0.9919369288657947,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, turns around, and walks back."
  },
  "011363": {
    "text": "the man is running and scrambling",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.0490572962102784,
      "Minus Multimodal Distance": -9.598466873168945,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.1386436350876465e-05,
      "MoBERT-F": 0.3804456323363324,
      "MoBERT-N": 0.5608064664334309,
      "MoBERT-min(F/N)": 0.3804456323363324,
      "MoBERT-max(F/N)": 0.5608064664334309,
      "MotionCritic": -15.491509437561035,
      "VeMo (human-opt view)": 0.8174672489082969,
      "VeMo (max entropy view)": 0.8174204355108877,
      "VeMo (min entropy view)": 0.8174672489082969,
      "VeMo (random view)": 0.8174672489082969,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man is running and scrambling."
  },
  "011372": {
    "text": "a man waves and then beckons with his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6299831617457151,
      "Minus Multimodal Distance": -3.9397940635681152,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.088196535827592e-05,
      "MoBERT-F": 0.3657046302299972,
      "MoBERT-N": 0.38807530394983153,
      "MoBERT-min(F/N)": 0.3657046302299972,
      "MoBERT-max(F/N)": 0.38807530394983153,
      "MotionCritic": -2.279033660888672,
      "VeMo (human-opt view)": 0.7317676143386898,
      "VeMo (max entropy view)": 0.6224188790560472,
      "VeMo (min entropy view)": 0.7317676143386898,
      "VeMo (random view)": 0.7317676143386898,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man waves and then beckons with his right hand."
  },
  "011385": {
    "text": "the person is picking something up and putting it on something.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7519368586976611,
      "Minus Multimodal Distance": -7.295019626617432,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.271704089362174e-05,
      "MoBERT-F": 0.5802228222569965,
      "MoBERT-N": 0.6013142369174709,
      "MoBERT-min(F/N)": 0.5802228222569965,
      "MoBERT-max(F/N)": 0.6013142369174709,
      "MotionCritic": -7.709268093109131,
      "VeMo (human-opt view)": 4.227735783618846e-06,
      "VeMo (max entropy view)": 4.643957713447906e-06,
      "VeMo (min entropy view)": 4.227735783618846e-06,
      "VeMo (random view)": 4.227735783618846e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is picking something up and putting it on something."
  },
  "011412": {
    "text": "a  person doing a limping walk, taking proper step with the right foot and limping with the left",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8495047458266416,
      "Minus Multimodal Distance": -2.4092063903808594,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9892162084579468,
      "MoBERT-F": 0.7882786317824071,
      "MoBERT-N": 0.6963857675627323,
      "MoBERT-min(F/N)": 0.6963857675627323,
      "MoBERT-max(F/N)": 0.7882786317824071,
      "MotionCritic": -13.676490783691406,
      "VeMo (human-opt view)": 0.23268206039076378,
      "VeMo (max entropy view)": 0.2448700410396717,
      "VeMo (min entropy view)": 0.23268206039076378,
      "VeMo (random view)": 0.2448700410396717,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person doing a limping walk, taking proper step with the right foot and limping with the left."
  },
  "011419": {
    "text": "a figure winds up for the pitch.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2644782429960417,
      "Minus Multimodal Distance": -10.561279296875,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.41297999024391174,
      "MoBERT-F": 0.6793831367029566,
      "MoBERT-N": 0.6204550310558314,
      "MoBERT-min(F/N)": 0.6204550310558314,
      "MoBERT-max(F/N)": 0.6793831367029566,
      "MotionCritic": -8.222200393676758,
      "VeMo (human-opt view)": 0.5617529880478087,
      "VeMo (max entropy view)": 0.5617529880478087,
      "VeMo (min entropy view)": 0.3933463796477495,
      "VeMo (random view)": 0.3933463796477495,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure winds up for the pitch."
  },
  "011435": {
    "text": "a person has his right hand in the air.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7110399045600153,
      "Minus Multimodal Distance": -6.779266357421875,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.408808722975664e-05,
      "MoBERT-F": 0.3736662300908001,
      "MoBERT-N": 0.5277118210720491,
      "MoBERT-min(F/N)": 0.3736662300908001,
      "MoBERT-max(F/N)": 0.5277118210720491,
      "MotionCritic": -5.047553062438965,
      "VeMo (human-opt view)": 0.9151157512482978,
      "VeMo (max entropy view)": 0.9151157512482978,
      "VeMo (min entropy view)": 0.9436298468556533,
      "VeMo (random view)": 0.9436298468556533,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person has his right hand in the air."
  },
  "011438": {
    "text": "the man is dancing around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2805527399442203,
      "Minus Multimodal Distance": -3.4123897552490234,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.06548107415437698,
      "MoBERT-F": 0.6909202105708228,
      "MoBERT-N": 0.6214179245868818,
      "MoBERT-min(F/N)": 0.6214179245868818,
      "MoBERT-max(F/N)": 0.6909202105708228,
      "MotionCritic": -8.387334823608398,
      "VeMo (human-opt view)": 0.887055183084064,
      "VeMo (max entropy view)": 0.887055183084064,
      "VeMo (min entropy view)": 0.9525048796356539,
      "VeMo (random view)": 0.887055183084064,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man is dancing around."
  },
  "011458": {
    "text": "a man repeatedly uses his right arm to reach for something slightly in front of him, back again, then looks to be stirring something.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8329719138095286,
      "Minus Multimodal Distance": -11.796196937561035,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.21714069286827e-05,
      "MoBERT-F": 0.4076180208041892,
      "MoBERT-N": 0.4665266334804275,
      "MoBERT-min(F/N)": 0.4076180208041892,
      "MoBERT-max(F/N)": 0.4665266334804275,
      "MotionCritic": -3.1161961555480957,
      "VeMo (human-opt view)": 0.7437185929648241,
      "VeMo (max entropy view)": 0.7061790668348046,
      "VeMo (min entropy view)": 0.7437185929648241,
      "VeMo (random view)": 0.7061790668348046,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man repeatedly reaches forward with his right arm for something slightly in front of himself, pulls his arm back again, and then looks to be stirring something."
  },
  "011471": {
    "text": "a person sits on the ledge of something then gets off and walks away.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5271222882544555,
      "Minus Multimodal Distance": -3.4634010791778564,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4136399588314816e-05,
      "MoBERT-F": 0.4465722938235477,
      "MoBERT-N": 0.4857804931271339,
      "MoBERT-min(F/N)": 0.4465722938235477,
      "MoBERT-max(F/N)": 0.4857804931271339,
      "MotionCritic": -13.015951156616211,
      "VeMo (human-opt view)": 1.2221010403470654e-05,
      "VeMo (max entropy view)": 4.281740411562781e-05,
      "VeMo (min entropy view)": 1.2221010403470654e-05,
      "VeMo (random view)": 4.281740411562781e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits on the ledge of something, then gets off and walks away."
  },
  "011491": {
    "text": "a person is running rapidly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.1119892730274183,
      "Minus Multimodal Distance": -13.024345397949219,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.425887502497062e-05,
      "MoBERT-F": 0.5095408688160395,
      "MoBERT-N": 0.5014414062323839,
      "MoBERT-min(F/N)": 0.5014414062323839,
      "MoBERT-max(F/N)": 0.5095408688160395,
      "MotionCritic": -10.14499282836914,
      "VeMo (human-opt view)": 0.6928895612708018,
      "VeMo (max entropy view)": 0.43686006825938567,
      "VeMo (min entropy view)": 0.6928895612708018,
      "VeMo (random view)": 0.43686006825938567,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is running rapidly."
  },
  "011493": {
    "text": "a person swam in free style",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.915356139345494,
      "Minus Multimodal Distance": -12.198747634887695,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9406817702692933e-05,
      "MoBERT-F": 0.43410763797736485,
      "MoBERT-N": 0.48301171344661825,
      "MoBERT-min(F/N)": 0.43410763797736485,
      "MoBERT-max(F/N)": 0.48301171344661825,
      "MotionCritic": -8.920924186706543,
      "VeMo (human-opt view)": 0.001245512491757638,
      "VeMo (max entropy view)": 0.015918958031837915,
      "VeMo (min entropy view)": 0.001245512491757638,
      "VeMo (random view)": 0.001245512491757638,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person swam in freestyle."
  },
  "011504": {
    "text": "a person is stepping around while their hands are behind their back",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.9491501639646223,
      "Minus Multimodal Distance": -5.296027183532715,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.03589779511094093,
      "MoBERT-F": 0.6872835847066463,
      "MoBERT-N": 0.6146760468993122,
      "MoBERT-min(F/N)": 0.6146760468993122,
      "MoBERT-max(F/N)": 0.6872835847066463,
      "MotionCritic": -6.694211483001709,
      "VeMo (human-opt view)": 0.3925399644760213,
      "VeMo (max entropy view)": 0.3925399644760213,
      "VeMo (min entropy view)": 0.15609408410548825,
      "VeMo (random view)": 0.3925399644760213,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is stepping around while their hands are behind their back."
  },
  "011555": {
    "text": "the person is doing a casual quick walk.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2366363124366615,
      "Minus Multimodal Distance": -6.954622745513916,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.3408290619263425e-05,
      "MoBERT-F": 0.4549059512586504,
      "MoBERT-N": 0.4649417446093659,
      "MoBERT-min(F/N)": 0.4549059512586504,
      "MoBERT-max(F/N)": 0.4649417446093659,
      "MotionCritic": -14.210339546203613,
      "VeMo (human-opt view)": 0.8353879622915156,
      "VeMo (max entropy view)": 0.7988904299583911,
      "VeMo (min entropy view)": 0.8353879622915156,
      "VeMo (random view)": 0.7988904299583911,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is doing a casual quick walk."
  },
  "011558": {
    "text": "the person was moving his arms up and down doing something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7361125075043006,
      "Minus Multimodal Distance": -8.120721817016602,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0005088416510261595,
      "MoBERT-F": 0.5035343379545917,
      "MoBERT-N": 0.5627241060776473,
      "MoBERT-min(F/N)": 0.5035343379545917,
      "MoBERT-max(F/N)": 0.5627241060776473,
      "MotionCritic": -11.050025939941406,
      "VeMo (human-opt view)": 0.6374695863746959,
      "VeMo (max entropy view)": 0.6374695863746959,
      "VeMo (min entropy view)": 0.7658862876254181,
      "VeMo (random view)": 0.6374695863746959,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was moving his arms up and down, doing something."
  },
  "011566": {
    "text": "spinning arms near chest.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7883998190804592,
      "Minus Multimodal Distance": -10.790384292602539,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00010343811300117522,
      "MoBERT-F": 0.5689065554239796,
      "MoBERT-N": 0.6202484713539794,
      "MoBERT-min(F/N)": 0.5689065554239796,
      "MoBERT-max(F/N)": 0.6202484713539794,
      "MotionCritic": -4.794338703155518,
      "VeMo (human-opt view)": 0.5470085470085471,
      "VeMo (max entropy view)": 0.5470085470085471,
      "VeMo (min entropy view)": 0.5776173285198556,
      "VeMo (random view)": 0.5470085470085471,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is spinning their arms near the chest."
  },
  "011569": {
    "text": "the person is on the seesaw.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.850269428821477,
      "Minus Multimodal Distance": -10.629775047302246,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5876757717924193e-05,
      "MoBERT-F": 0.5146712476662525,
      "MoBERT-N": 0.5820446871209806,
      "MoBERT-min(F/N)": 0.5146712476662525,
      "MoBERT-max(F/N)": 0.5820446871209806,
      "MotionCritic": -11.66033935546875,
      "VeMo (human-opt view)": 0.00020340913713844027,
      "VeMo (max entropy view)": 0.00020340913713844027,
      "VeMo (min entropy view)": 6.032180619100438e-05,
      "VeMo (random view)": 0.00020340913713844027,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is on the seesaw."
  },
  "011583": {
    "text": "raising and lowering arms.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0159428698496284,
      "Minus Multimodal Distance": -6.912169456481934,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.0502171284751967e-05,
      "MoBERT-F": 0.4789913079744825,
      "MoBERT-N": 0.4982320883915542,
      "MoBERT-min(F/N)": 0.4789913079744825,
      "MoBERT-max(F/N)": 0.4982320883915542,
      "MotionCritic": -7.902594566345215,
      "VeMo (human-opt view)": 0.9896569380440349,
      "VeMo (max entropy view)": 0.9725115246713334,
      "VeMo (min entropy view)": 0.9896569380440349,
      "VeMo (random view)": 0.9896569380440349,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is raising and lowering their arms."
  },
  "011589": {
    "text": "laying down and crawling forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.4157585566711717,
      "Minus Multimodal Distance": -4.7375922203063965,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.319864481454715e-05,
      "MoBERT-F": 0.3643005230001712,
      "MoBERT-N": 0.36714409711564877,
      "MoBERT-min(F/N)": 0.3643005230001712,
      "MoBERT-max(F/N)": 0.36714409711564877,
      "MotionCritic": -6.10911226272583,
      "VeMo (human-opt view)": 0.9970179412387765,
      "VeMo (max entropy view)": 0.9951050964583933,
      "VeMo (min entropy view)": 0.9970179412387765,
      "VeMo (random view)": 0.9951050964583933,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is lying down and crawling forward."
  },
  "011594": {
    "text": "a person walks forward while being assisted by hand rails.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2999398158147226,
      "Minus Multimodal Distance": -6.899751663208008,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.835912528098561e-05,
      "MoBERT-F": 0.5271019956424972,
      "MoBERT-N": 0.5531839093350733,
      "MoBERT-min(F/N)": 0.5271019956424972,
      "MoBERT-max(F/N)": 0.5531839093350733,
      "MotionCritic": -6.597081661224365,
      "VeMo (human-opt view)": 0.004597831639893285,
      "VeMo (max entropy view)": 0.004597831639893285,
      "VeMo (min entropy view)": 0.00045865625477766933,
      "VeMo (random view)": 0.004597831639893285,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward while being assisted by handrails."
  },
  "011595": {
    "text": "a person hustles down a short flight of steps before coming to a stop.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1291972870944977,
      "Minus Multimodal Distance": -4.3221917152404785,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0012808998581022024,
      "MoBERT-F": 0.5701212703465908,
      "MoBERT-N": 0.6246912314787602,
      "MoBERT-min(F/N)": 0.5701212703465908,
      "MoBERT-max(F/N)": 0.6246912314787602,
      "MotionCritic": -13.984857559204102,
      "VeMo (human-opt view)": 0.08018867924528301,
      "VeMo (max entropy view)": 0.08018867924528301,
      "VeMo (min entropy view)": 0.02040816326530612,
      "VeMo (random view)": 0.08018867924528301,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person hustles down a short flight of steps before coming to a stop."
  },
  "011629": {
    "text": "a person bends over and picks an object up with both hands and stands up straight.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.459637844803646,
      "Minus Multimodal Distance": -9.45523738861084,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.2160882255993783e-05,
      "MoBERT-F": 0.49545573367531326,
      "MoBERT-N": 0.608622118207151,
      "MoBERT-min(F/N)": 0.49545573367531326,
      "MoBERT-max(F/N)": 0.608622118207151,
      "MotionCritic": -10.771658897399902,
      "VeMo (human-opt view)": 2.912564429211475e-06,
      "VeMo (max entropy view)": 3.0929882163334933e-06,
      "VeMo (min entropy view)": 2.912564429211475e-06,
      "VeMo (random view)": 2.912564429211475e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends over, picks up an object with both hands, and stands up straight."
  },
  "011630": {
    "text": "a person who seems to throw or hit something",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5562575601191944,
      "Minus Multimodal Distance": -5.919240474700928,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.12726807594299316,
      "MoBERT-F": 0.6810111924773344,
      "MoBERT-N": 0.5601282171262497,
      "MoBERT-min(F/N)": 0.5601282171262497,
      "MoBERT-max(F/N)": 0.6810111924773344,
      "MotionCritic": -13.149076461791992,
      "VeMo (human-opt view)": 0.8667992047713717,
      "VeMo (max entropy view)": 0.8667992047713717,
      "VeMo (min entropy view)": 0.9196901752955564,
      "VeMo (random view)": 0.9196901752955564,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who seems to throw or hit something."
  },
  "011632": {
    "text": "a person takes one large step forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6971519777717571,
      "Minus Multimodal Distance": -11.891609191894531,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3767872335156426e-05,
      "MoBERT-F": 0.43289976551430814,
      "MoBERT-N": 0.6188500088234237,
      "MoBERT-min(F/N)": 0.43289976551430814,
      "MoBERT-max(F/N)": 0.6188500088234237,
      "MotionCritic": -13.230755805969238,
      "VeMo (human-opt view)": 0.9769820971867008,
      "VeMo (max entropy view)": 0.9579199452617174,
      "VeMo (min entropy view)": 0.9769820971867008,
      "VeMo (random view)": 0.9579199452617174,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person takes one large step forward."
  },
  "011640": {
    "text": "a person walks in an s shape pattern.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5755889709733801,
      "Minus Multimodal Distance": -1.633239507675171,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6244773835060187e-05,
      "MoBERT-F": 0.5034835847685617,
      "MoBERT-N": 0.557711515350777,
      "MoBERT-min(F/N)": 0.5034835847685617,
      "MoBERT-max(F/N)": 0.557711515350777,
      "MotionCritic": -5.318166255950928,
      "VeMo (human-opt view)": 0.6788617886178862,
      "VeMo (max entropy view)": 0.6788617886178862,
      "VeMo (min entropy view)": 0.7431340872374798,
      "VeMo (random view)": 0.7431340872374798,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks in an S - shaped pattern."
  },
  "011645": {
    "text": "a person plays the guitar, strumming with their left hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3708339910511158,
      "Minus Multimodal Distance": -13.875699043273926,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.061754978261888e-05,
      "MoBERT-F": 0.35626692214538247,
      "MoBERT-N": 0.3916213744545848,
      "MoBERT-min(F/N)": 0.35626692214538247,
      "MoBERT-max(F/N)": 0.3916213744545848,
      "MotionCritic": -8.061660766601562,
      "VeMo (human-opt view)": 5.933938312472715e-06,
      "VeMo (max entropy view)": 1.6195469953053215e-05,
      "VeMo (min entropy view)": 5.933938312472715e-06,
      "VeMo (random view)": 5.933938312472715e-06,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is playing the guitar, strumming with their left hand."
  },
  "011652": {
    "text": "a person jumps in the air, then abruptly stumbles to his left as if he had been pushed, and finally he regains his balance.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6639105017441149,
      "Minus Multimodal Distance": -7.28432559967041,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.371726804994978e-05,
      "MoBERT-F": 0.40429267471981956,
      "MoBERT-N": 0.5492252119902592,
      "MoBERT-min(F/N)": 0.40429267471981956,
      "MoBERT-max(F/N)": 0.5492252119902592,
      "MotionCritic": -8.68008804321289,
      "VeMo (human-opt view)": 0.6519480519480519,
      "VeMo (max entropy view)": 0.6519480519480519,
      "VeMo (min entropy view)": 0.33451957295373663,
      "VeMo (random view)": 0.33451957295373663,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps into the air, then abruptly stumbles to his left as if he had been pushed, and finally, he regains his balance."
  },
  "011682": {
    "text": "a man stands prepared and aware, then suddenly takes an abrupt step backwards and regains his ready stance.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5791288569094616,
      "Minus Multimodal Distance": -2.012927532196045,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8315091729164124,
      "MoBERT-F": 0.505387899339651,
      "MoBERT-N": 0.4371908598246732,
      "MoBERT-min(F/N)": 0.4371908598246732,
      "MoBERT-max(F/N)": 0.505387899339651,
      "MotionCritic": -3.4158358573913574,
      "VeMo (human-opt view)": 0.8992460589444825,
      "VeMo (max entropy view)": 0.8992460589444825,
      "VeMo (min entropy view)": 0.9045592705167174,
      "VeMo (random view)": 0.8992460589444825,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man stands prepared and aware. Then, suddenly, he takes an abrupt step backwards and regains his ready stance."
  },
  "011683": {
    "text": "a man lowers his arms and places his hands on his knees.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6204898404024384,
      "Minus Multimodal Distance": -9.588154792785645,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3838878405513242e-05,
      "MoBERT-F": 0.40928068321478905,
      "MoBERT-N": 0.5056266872824823,
      "MoBERT-min(F/N)": 0.40928068321478905,
      "MoBERT-max(F/N)": 0.5056266872824823,
      "MotionCritic": -12.328012466430664,
      "VeMo (human-opt view)": 0.977008652657602,
      "VeMo (max entropy view)": 0.9577854671280277,
      "VeMo (min entropy view)": 0.977008652657602,
      "VeMo (random view)": 0.9577854671280277,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man lowers his arms and places his hands on his knees."
  },
  "011687": {
    "text": "a person walks forward a few steps and then takes a drink with his right hand",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9568981191926244,
      "Minus Multimodal Distance": -7.21328592300415,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4787143047433347e-05,
      "MoBERT-F": 0.37528349197534283,
      "MoBERT-N": 0.45383491010456317,
      "MoBERT-min(F/N)": 0.37528349197534283,
      "MoBERT-max(F/N)": 0.45383491010456317,
      "MotionCritic": -6.739962577819824,
      "VeMo (human-opt view)": 4.402839801307431e-05,
      "VeMo (max entropy view)": 0.009114745910850293,
      "VeMo (min entropy view)": 4.402839801307431e-05,
      "VeMo (random view)": 0.009114745910850293,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward a few steps and then takes a drink with his right hand."
  },
  "011694": {
    "text": "a person jumps while spreading legs apart and swinging arms out and above the head, then back down again.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1301796137945508,
      "Minus Multimodal Distance": -9.702173233032227,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9398592114448547,
      "MoBERT-F": 0.8311845779348901,
      "MoBERT-N": 0.7437874115191933,
      "MoBERT-min(F/N)": 0.7437874115191933,
      "MoBERT-max(F/N)": 0.8311845779348901,
      "MotionCritic": -8.312172889709473,
      "VeMo (human-opt view)": 0.9552969993876301,
      "VeMo (max entropy view)": 0.919407300325262,
      "VeMo (min entropy view)": 0.9552969993876301,
      "VeMo (random view)": 0.919407300325262,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps while spreading their legs apart and swinging their arms out and above their head, then back down again."
  },
  "011716": {
    "text": "a person is climbing up a ladder and cleaning",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3172700951712848,
      "Minus Multimodal Distance": -4.937324047088623,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.444700112391729e-05,
      "MoBERT-F": 0.4917664832629833,
      "MoBERT-N": 0.49936827307137155,
      "MoBERT-min(F/N)": 0.4917664832629833,
      "MoBERT-max(F/N)": 0.49936827307137155,
      "MotionCritic": -12.993982315063477,
      "VeMo (human-opt view)": 4.9211003937177565e-06,
      "VeMo (max entropy view)": 7.402274423747695e-06,
      "VeMo (min entropy view)": 4.9211003937177565e-06,
      "VeMo (random view)": 7.402274423747695e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is climbing up a ladder and cleaning."
  },
  "011717": {
    "text": "a ballerina is bringing the leg forward then taking a bow.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8696555813948733,
      "Minus Multimodal Distance": -9.785844802856445,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.011990019120275974,
      "MoBERT-F": 0.5836277813439502,
      "MoBERT-N": 0.510093625725911,
      "MoBERT-min(F/N)": 0.510093625725911,
      "MoBERT-max(F/N)": 0.5836277813439502,
      "MotionCritic": -4.812392711639404,
      "VeMo (human-opt view)": 0.1330909090909091,
      "VeMo (max entropy view)": 0.1330909090909091,
      "VeMo (min entropy view)": 0.08045977011494253,
      "VeMo (random view)": 0.08045977011494253,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A ballerina is bringing the leg forward and then taking a bow."
  },
  "011731": {
    "text": "a person raised the hands and pull it down",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0453769057003721,
      "Minus Multimodal Distance": -10.418157577514648,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0001700329885352403,
      "MoBERT-F": 0.5092585799013445,
      "MoBERT-N": 0.5373547626761368,
      "MoBERT-min(F/N)": 0.5092585799013445,
      "MoBERT-max(F/N)": 0.5373547626761368,
      "MotionCritic": -7.33169412612915,
      "VeMo (human-opt view)": 0.8987654320987655,
      "VeMo (max entropy view)": 0.8867585380467345,
      "VeMo (min entropy view)": 0.8987654320987655,
      "VeMo (random view)": 0.8987654320987655,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raised the hands and pulled them down."
  },
  "011743": {
    "text": "turning body from side to side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7700032617855198,
      "Minus Multimodal Distance": -8.635614395141602,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.593541623558849e-05,
      "MoBERT-F": 0.4427532339512545,
      "MoBERT-N": 0.5536800032887186,
      "MoBERT-min(F/N)": 0.4427532339512545,
      "MoBERT-max(F/N)": 0.5536800032887186,
      "MotionCritic": -2.6750457286834717,
      "VeMo (human-opt view)": 0.9147788565264293,
      "VeMo (max entropy view)": 0.8668320926385442,
      "VeMo (min entropy view)": 0.9147788565264293,
      "VeMo (random view)": 0.9147788565264293,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is turning their body from side to side."
  },
  "011757": {
    "text": "a person raises both hands and claps their hands multiple times before returning to their original position.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3660639586209485,
      "Minus Multimodal Distance": -2.5252609252929688,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.437683542666491e-05,
      "MoBERT-F": 0.3373033567509583,
      "MoBERT-N": 0.4767327023517025,
      "MoBERT-min(F/N)": 0.3373033567509583,
      "MoBERT-max(F/N)": 0.4767327023517025,
      "MotionCritic": -14.030896186828613,
      "VeMo (human-opt view)": 0.9840462427745664,
      "VeMo (max entropy view)": 0.24477611940298508,
      "VeMo (min entropy view)": 0.9840462427745664,
      "VeMo (random view)": 0.9840462427745664,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person raises both hands and claps them multiple times before returning to their original position."
  },
  "011767": {
    "text": "a person is startled from behind.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6786322744963094,
      "Minus Multimodal Distance": -10.137022972106934,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.10566283587832e-05,
      "MoBERT-F": 0.5483660009392934,
      "MoBERT-N": 0.5255423075320109,
      "MoBERT-min(F/N)": 0.5255423075320109,
      "MoBERT-max(F/N)": 0.5483660009392934,
      "MotionCritic": 0.8981018662452698,
      "VeMo (human-opt view)": 0.6796875,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.6796875,
      "VeMo (random view)": 0.6796875,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is startled from behind."
  },
  "011793": {
    "text": "figure seen walking in place lazily.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9301282802119961,
      "Minus Multimodal Distance": -10.234772682189941,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8102931537432596e-05,
      "MoBERT-F": 0.46180846407765086,
      "MoBERT-N": 0.6118430278020741,
      "MoBERT-min(F/N)": 0.46180846407765086,
      "MoBERT-max(F/N)": 0.6118430278020741,
      "MotionCritic": -8.006731986999512,
      "VeMo (human-opt view)": 0.9197461739455021,
      "VeMo (max entropy view)": 0.8176100628930818,
      "VeMo (min entropy view)": 0.9197461739455021,
      "VeMo (random view)": 0.9197461739455021,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Figure seen walking in place lazily."
  },
  "011797": {
    "text": "a person is attempting to jump rope by hopping from one leg to the other as if running in place, but has to reset every two to three jumps.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3400902260703187,
      "Minus Multimodal Distance": -5.17793607711792,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9968581199645996,
      "MoBERT-F": 0.8343275890589221,
      "MoBERT-N": 0.807093482599587,
      "MoBERT-min(F/N)": 0.807093482599587,
      "MoBERT-max(F/N)": 0.8343275890589221,
      "MotionCritic": -14.73591423034668,
      "VeMo (human-opt view)": 0.6651270207852193,
      "VeMo (max entropy view)": 0.6651270207852193,
      "VeMo (min entropy view)": 0.6792452830188679,
      "VeMo (random view)": 0.6792452830188679,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is attempting to jump rope by hopping from one leg to the other, as if running in place, but has to reset every two to three jumps."
  },
  "011798": {
    "text": "a person who is standing with his hands by his sides, turns and steps backwards, jogs forward six steps, turns 180 degrees and jogs four steps, then stops and resumes his original position.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.0971394854827135,
      "Minus Multimodal Distance": -9.331830024719238,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.338068043172825e-05,
      "MoBERT-F": 0.3852688072967312,
      "MoBERT-N": 0.5232696420361294,
      "MoBERT-min(F/N)": 0.3852688072967312,
      "MoBERT-max(F/N)": 0.5232696420361294,
      "MotionCritic": -6.414645671844482,
      "VeMo (human-opt view)": 0.8869690424766019,
      "VeMo (max entropy view)": 0.8869690424766019,
      "VeMo (min entropy view)": 0.9148325358851674,
      "VeMo (random view)": 0.9148325358851674,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who is standing with his hands by his sides turns and steps backwards, jogs forward six steps, turns 180 degrees and jogs four steps, then stops and returns to his original position."
  },
  "011805": {
    "text": "the  person was walking very fast.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0464811097647042,
      "Minus Multimodal Distance": -1.1792681217193604,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.7418674230575562,
      "MoBERT-F": 0.8458420896260864,
      "MoBERT-N": 0.8008343051043627,
      "MoBERT-min(F/N)": 0.8008343051043627,
      "MoBERT-max(F/N)": 0.8458420896260864,
      "MotionCritic": -8.266119003295898,
      "VeMo (human-opt view)": 0.044534412955465584,
      "VeMo (max entropy view)": 0.044534412955465584,
      "VeMo (min entropy view)": 0.009142996464708033,
      "VeMo (random view)": 0.009142996464708033,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was walking very fast."
  },
  "011809": {
    "text": "a person is moving around the room while moving his hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.0691659563965223,
      "Minus Multimodal Distance": -7.112249851226807,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4493021555827e-05,
      "MoBERT-F": 0.4620349973558049,
      "MoBERT-N": 0.4746311298231058,
      "MoBERT-min(F/N)": 0.4620349973558049,
      "MoBERT-max(F/N)": 0.4746311298231058,
      "MotionCritic": -4.859821319580078,
      "VeMo (human-opt view)": 0.7435456110154905,
      "VeMo (max entropy view)": 0.7435456110154905,
      "VeMo (min entropy view)": 0.8597748208802457,
      "VeMo (random view)": 0.7435456110154905,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is moving around the room while moving his hands."
  },
  "011825": {
    "text": "a person jogs diagonally to his right and then slows down to a walk.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4023231673201104,
      "Minus Multimodal Distance": -10.790467262268066,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.9435562434373423e-05,
      "MoBERT-F": 0.4251378047392372,
      "MoBERT-N": 0.6471298569182793,
      "MoBERT-min(F/N)": 0.4251378047392372,
      "MoBERT-max(F/N)": 0.6471298569182793,
      "MotionCritic": -4.459040641784668,
      "VeMo (human-opt view)": 0.9362999085644621,
      "VeMo (max entropy view)": 0.9362488450877733,
      "VeMo (min entropy view)": 0.9362999085644621,
      "VeMo (random view)": 0.9362488450877733,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person jogs diagonally to their right and then slows down to a walk."
  },
  "011827": {
    "text": "a person bends down, picks something up, sticks it in the ground and rotates it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.413803000872466,
      "Minus Multimodal Distance": -10.18651294708252,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00011481178808026016,
      "MoBERT-F": 0.48028094830570844,
      "MoBERT-N": 0.5409439821735531,
      "MoBERT-min(F/N)": 0.48028094830570844,
      "MoBERT-max(F/N)": 0.5409439821735531,
      "MotionCritic": -4.548900127410889,
      "VeMo (human-opt view)": 0.02927450148493848,
      "VeMo (max entropy view)": 0.45320197044334976,
      "VeMo (min entropy view)": 0.02927450148493848,
      "VeMo (random view)": 0.45320197044334976,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends down, picks something up, sticks it in the ground, and rotates it."
  },
  "011863": {
    "text": "a man stands with his legs parted and slowly rotates his upper torso a few times, then starts to rotate his hips.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7667622063578745,
      "Minus Multimodal Distance": -10.41075325012207,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9884393370593898e-05,
      "MoBERT-F": 0.4044352521248229,
      "MoBERT-N": 0.4818299753826477,
      "MoBERT-min(F/N)": 0.4044352521248229,
      "MoBERT-max(F/N)": 0.4818299753826477,
      "MotionCritic": -17.989089965820312,
      "VeMo (human-opt view)": 0.8667496886674969,
      "VeMo (max entropy view)": 0.7872127872127872,
      "VeMo (min entropy view)": 0.8667496886674969,
      "VeMo (random view)": 0.8667496886674969,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man stands with his legs parted and slowly rotates his upper torso a few times. Then, he starts to rotate his hips."
  },
  "011864": {
    "text": "a person, standing in the middle of the screen, raises their arms and waves twice before lowering their arms again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7212866079098598,
      "Minus Multimodal Distance": -11.999268531799316,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.05026344209909439,
      "MoBERT-F": 0.43783480409622055,
      "MoBERT-N": 0.42505672695242736,
      "MoBERT-min(F/N)": 0.42505672695242736,
      "MoBERT-max(F/N)": 0.43783480409622055,
      "MotionCritic": -7.975674152374268,
      "VeMo (human-opt view)": 0.9604826546003017,
      "VeMo (max entropy view)": 0.9552238805970149,
      "VeMo (min entropy view)": 0.9604826546003017,
      "VeMo (random view)": 0.9552238805970149,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person, standing in the middle of the screen, raises their arms and waves twice before lowering their arms again."
  },
  "011897": {
    "text": "a person kneeling on the ground gets up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0021650435846694,
      "Minus Multimodal Distance": -7.344284534454346,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4345472411368974e-05,
      "MoBERT-F": 0.385475560376098,
      "MoBERT-N": 0.5479092585506067,
      "MoBERT-min(F/N)": 0.385475560376098,
      "MoBERT-max(F/N)": 0.5479092585506067,
      "MotionCritic": -6.281535625457764,
      "VeMo (human-opt view)": 0.9995949945480035,
      "VeMo (max entropy view)": 0.9990908486152072,
      "VeMo (min entropy view)": 0.9995949945480035,
      "VeMo (random view)": 0.9995949945480035,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person kneeling on the ground gets up."
  },
  "011904": {
    "text": "a person sits down at a chair, then moves the chair around while they are still in it.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5892882040125627,
      "Minus Multimodal Distance": -9.519160270690918,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.361286897212267e-05,
      "MoBERT-F": 0.3976405418165693,
      "MoBERT-N": 0.41292999776239553,
      "MoBERT-min(F/N)": 0.3976405418165693,
      "MoBERT-max(F/N)": 0.41292999776239553,
      "MotionCritic": -6.959976673126221,
      "VeMo (human-opt view)": 2.7641969709266185e-05,
      "VeMo (max entropy view)": 3.7652111493975054e-05,
      "VeMo (min entropy view)": 2.7641969709266185e-05,
      "VeMo (random view)": 3.7652111493975054e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits down on a chair, then moves the chair around while they are still in it."
  },
  "011935": {
    "text": "a person walks down a hill and places a box on the ground.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8029731833312428,
      "Minus Multimodal Distance": -6.237631797790527,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.04735457897186279,
      "MoBERT-F": 0.6492843476243231,
      "MoBERT-N": 0.6013983395011308,
      "MoBERT-min(F/N)": 0.6013983395011308,
      "MoBERT-max(F/N)": 0.6492843476243231,
      "MotionCritic": -7.042967319488525,
      "VeMo (human-opt view)": 1.0468129425113397e-05,
      "VeMo (max entropy view)": 1.0468129425113397e-05,
      "VeMo (min entropy view)": 8.398955892665626e-06,
      "VeMo (random view)": 1.0468129425113397e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks down a hill and places a box on the ground."
  },
  "011965": {
    "text": "a person abruptly stumbles forward and regains his balance as if he had been pushed from behind.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5333011510887806,
      "Minus Multimodal Distance": -8.846135139465332,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.3259835618082434e-05,
      "MoBERT-F": 0.3606354342580733,
      "MoBERT-N": 0.4771100825292524,
      "MoBERT-min(F/N)": 0.3606354342580733,
      "MoBERT-max(F/N)": 0.4771100825292524,
      "MotionCritic": 0.8981018662452698,
      "VeMo (human-opt view)": 0.8082901554404145,
      "VeMo (max entropy view)": 0.6083150984682714,
      "VeMo (min entropy view)": 0.8082901554404145,
      "VeMo (random view)": 0.6083150984682714,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person abruptly stumbles forward and regains their balance as if they had been pushed from behind."
  },
  "011972": {
    "text": "a figure steps backward cockily, swinging their arms",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.5476100699364985,
      "Minus Multimodal Distance": -4.478930473327637,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0001320485316682607,
      "MoBERT-F": 0.44131920117410084,
      "MoBERT-N": 0.4168923349388346,
      "MoBERT-min(F/N)": 0.4168923349388346,
      "MoBERT-max(F/N)": 0.44131920117410084,
      "MotionCritic": -6.10911226272583,
      "VeMo (human-opt view)": 7.891125210503245e-06,
      "VeMo (max entropy view)": 5.152702592509771e-05,
      "VeMo (min entropy view)": 7.891125210503245e-06,
      "VeMo (random view)": 5.152702592509771e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure steps backward cockily, swinging their arms."
  },
  "011988": {
    "text": "this person bends forward as if to bow.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6094374893940822,
      "Minus Multimodal Distance": -10.525630950927734,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.11885318160057068,
      "MoBERT-F": 0.599441225212021,
      "MoBERT-N": 0.6502315736546754,
      "MoBERT-min(F/N)": 0.599441225212021,
      "MoBERT-max(F/N)": 0.6502315736546754,
      "MotionCritic": -3.4534428119659424,
      "VeMo (human-opt view)": 0.9981930575368522,
      "VeMo (max entropy view)": 0.9944397897783532,
      "VeMo (min entropy view)": 0.9981930575368522,
      "VeMo (random view)": 0.9981930575368522,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person bends forward as if to bow."
  },
  "011991": {
    "text": "a person appears to be pushing against a wall.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5186548754034426,
      "Minus Multimodal Distance": -9.031538009643555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 8.290535333799198e-05,
      "MoBERT-F": 0.6262210958488414,
      "MoBERT-N": 0.6027199437775486,
      "MoBERT-min(F/N)": 0.6027199437775486,
      "MoBERT-max(F/N)": 0.6262210958488414,
      "MotionCritic": -1.2477694749832153,
      "VeMo (human-opt view)": 0.11239193083573487,
      "VeMo (max entropy view)": 0.40794223826714804,
      "VeMo (min entropy view)": 0.11239193083573487,
      "VeMo (random view)": 0.11239193083573487,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person appears to be pushing against a wall."
  },
  "011993": {
    "text": "a person appears to be playing the violin.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7224400656078759,
      "Minus Multimodal Distance": -13.186386108398438,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.799714977503754e-05,
      "MoBERT-F": 0.5185231428008796,
      "MoBERT-N": 0.5110213019336081,
      "MoBERT-min(F/N)": 0.5110213019336081,
      "MoBERT-max(F/N)": 0.5185231428008796,
      "MotionCritic": -4.0245890617370605,
      "VeMo (human-opt view)": 1.2247035966332295e-05,
      "VeMo (max entropy view)": 1.387881232104144e-05,
      "VeMo (min entropy view)": 1.2247035966332295e-05,
      "VeMo (random view)": 1.2247035966332295e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person appears to be playing the violin."
  },
  "012001": {
    "text": "a man picks up an object moves it a few inches to the right then places it back down",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3661803938085477,
      "Minus Multimodal Distance": -11.156428337097168,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.0290850190795027e-05,
      "MoBERT-F": 0.43491338925658685,
      "MoBERT-N": 0.5387521074132965,
      "MoBERT-min(F/N)": 0.43491338925658685,
      "MoBERT-max(F/N)": 0.5387521074132965,
      "MotionCritic": -0.42969492077827454,
      "VeMo (human-opt view)": 0.10674157303370786,
      "VeMo (max entropy view)": 0.16439600363306087,
      "VeMo (min entropy view)": 0.10674157303370786,
      "VeMo (random view)": 0.10674157303370786,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man picks up an object, moves it a few inches to the right, then places it back down."
  },
  "012005": {
    "text": "a person bends down and crawls to the left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4378163969185977,
      "Minus Multimodal Distance": -7.812775611877441,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2325990357785486e-05,
      "MoBERT-F": 0.43266245497098527,
      "MoBERT-N": 0.5042801103731739,
      "MoBERT-min(F/N)": 0.43266245497098527,
      "MoBERT-max(F/N)": 0.5042801103731739,
      "MotionCritic": -6.417893886566162,
      "VeMo (human-opt view)": 0.9496619083395943,
      "VeMo (max entropy view)": 0.9496619083395943,
      "VeMo (min entropy view)": 0.9808192771084338,
      "VeMo (random view)": 0.9808192771084338,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person bends down and crawls to the left."
  },
  "012020": {
    "text": "a person, standing still with their arms at their sides, seems to wince/twitch, then continues standing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4186431960205457,
      "Minus Multimodal Distance": -14.056489944458008,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.8940765080042183e-05,
      "MoBERT-F": 0.43683695538135076,
      "MoBERT-N": 0.5362790780437786,
      "MoBERT-min(F/N)": 0.43683695538135076,
      "MoBERT-max(F/N)": 0.5362790780437786,
      "MotionCritic": -4.229795932769775,
      "VeMo (human-opt view)": 0.7878787878787878,
      "VeMo (max entropy view)": 0.6797853309481217,
      "VeMo (min entropy view)": 0.7878787878787878,
      "VeMo (random view)": 0.6797853309481217,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person, standing still with their arms at their sides, seems to wince or twitch, then continues standing."
  },
  "012021": {
    "text": "a person leans forward and uses sweeping motion, sweeping from person's left to right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6871925502439039,
      "Minus Multimodal Distance": -11.015580177307129,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4277240299852565e-05,
      "MoBERT-F": 0.5135986531603436,
      "MoBERT-N": 0.5097369776145266,
      "MoBERT-min(F/N)": 0.5097369776145266,
      "MoBERT-max(F/N)": 0.5135986531603436,
      "MotionCritic": -3.6534171104431152,
      "VeMo (human-opt view)": 0.7977207977207977,
      "VeMo (max entropy view)": 0.7977207977207977,
      "VeMo (min entropy view)": 0.86652977412731,
      "VeMo (random view)": 0.7977207977207977,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person leans forward and uses a sweeping motion, sweeping from the person's left to right."
  },
  "012023": {
    "text": "a person walks while holding a hand rail",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1078964046241508,
      "Minus Multimodal Distance": -4.517414569854736,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.809470920008607e-05,
      "MoBERT-F": 0.3087736162729578,
      "MoBERT-N": 0.4188928429412196,
      "MoBERT-min(F/N)": 0.3087736162729578,
      "MoBERT-max(F/N)": 0.4188928429412196,
      "MotionCritic": -15.447405815124512,
      "VeMo (human-opt view)": 0.017964071856287425,
      "VeMo (max entropy view)": 0.017964071856287425,
      "VeMo (min entropy view)": 0.004080503492634346,
      "VeMo (random view)": 0.017964071856287425,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks while holding a handrail."
  },
  "012032": {
    "text": "a person jogs forward for several seconds.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0704182640602748,
      "Minus Multimodal Distance": -5.150074005126953,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4393950297962874e-05,
      "MoBERT-F": 0.41004610926532026,
      "MoBERT-N": 0.5878024862692578,
      "MoBERT-min(F/N)": 0.41004610926532026,
      "MoBERT-max(F/N)": 0.5878024862692578,
      "MotionCritic": -7.405188083648682,
      "VeMo (human-opt view)": 0.9363519863306279,
      "VeMo (max entropy view)": 0.6793048973143759,
      "VeMo (min entropy view)": 0.9363519863306279,
      "VeMo (random view)": 0.6793048973143759,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person jogs forward for several seconds."
  },
  "012041": {
    "text": "a person is jogging forward at a steady pace.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.144189745313744,
      "Minus Multimodal Distance": -8.559569358825684,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6479174266569316e-05,
      "MoBERT-F": 0.5044333341053766,
      "MoBERT-N": 0.5366979543358819,
      "MoBERT-min(F/N)": 0.5044333341053766,
      "MoBERT-max(F/N)": 0.5366979543358819,
      "MotionCritic": -6.903383255004883,
      "VeMo (human-opt view)": 0.9959422714913198,
      "VeMo (max entropy view)": 0.9896907216494846,
      "VeMo (min entropy view)": 0.9959422714913198,
      "VeMo (random view)": 0.9896907216494846,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is jogging forward at a steady pace."
  },
  "012046": {
    "text": "a person walks on a beam, loses his balance and fall off to his right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3043868023500642,
      "Minus Multimodal Distance": -11.543505668640137,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.639618444140069e-05,
      "MoBERT-F": 0.4608566382353858,
      "MoBERT-N": 0.5378091572956859,
      "MoBERT-min(F/N)": 0.4608566382353858,
      "MoBERT-max(F/N)": 0.5378091572956859,
      "MotionCritic": -4.962840557098389,
      "VeMo (human-opt view)": 0.3924466338259442,
      "VeMo (max entropy view)": 0.3924466338259442,
      "VeMo (min entropy view)": 0.293598233995585,
      "VeMo (random view)": 0.3924466338259442,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks on a beam, loses his balance and falls off to his right."
  },
  "012049": {
    "text": "a person jumps sideways to their right several times, then several times to the left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3561116436929694,
      "Minus Multimodal Distance": -4.857067108154297,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 6.06126050115563e-05,
      "MoBERT-F": 0.4886573651886936,
      "MoBERT-N": 0.6943302283717405,
      "MoBERT-min(F/N)": 0.4886573651886936,
      "MoBERT-max(F/N)": 0.6943302283717405,
      "MotionCritic": -5.271360874176025,
      "VeMo (human-opt view)": 0.5920245398773006,
      "VeMo (max entropy view)": 0.5920245398773006,
      "VeMo (min entropy view)": 0.6799116997792495,
      "VeMo (random view)": 0.5920245398773006,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps sideways to their right several times, then several times to the left."
  },
  "012099": {
    "text": "a person lifts something to their face and wobbles their body in circles.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7955972061536308,
      "Minus Multimodal Distance": -2.066288948059082,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3880929802544415e-05,
      "MoBERT-F": 0.39567400716087536,
      "MoBERT-N": 0.43732415149457154,
      "MoBERT-min(F/N)": 0.39567400716087536,
      "MoBERT-max(F/N)": 0.43732415149457154,
      "MotionCritic": -2.1427228450775146,
      "VeMo (human-opt view)": 0.48491879350348027,
      "VeMo (max entropy view)": 0.48491879350348027,
      "VeMo (min entropy view)": 0.03319799379030332,
      "VeMo (random view)": 0.48491879350348027,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lifts something to their face and wobbles their body in circles."
  },
  "012117": {
    "text": "a person lifting their left arm up",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7535400547342875,
      "Minus Multimodal Distance": -9.819137573242188,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.582346107577905e-05,
      "MoBERT-F": 0.3632992194156035,
      "MoBERT-N": 0.46968020702628727,
      "MoBERT-min(F/N)": 0.3632992194156035,
      "MoBERT-max(F/N)": 0.46968020702628727,
      "MotionCritic": -8.320091247558594,
      "VeMo (human-opt view)": 0.8670520231213873,
      "VeMo (max entropy view)": 0.8670520231213873,
      "VeMo (min entropy view)": 0.9398847104035136,
      "VeMo (random view)": 0.9398847104035136,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is lifting their left arm up."
  },
  "012118": {
    "text": "this person side steps left, stops and side steps right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7384224495772238,
      "Minus Multimodal Distance": -1.9360517263412476,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.454222678556107e-05,
      "MoBERT-F": 0.4574863095617556,
      "MoBERT-N": 0.5904481498867827,
      "MoBERT-min(F/N)": 0.4574863095617556,
      "MoBERT-max(F/N)": 0.5904481498867827,
      "MotionCritic": -5.655721664428711,
      "VeMo (human-opt view)": 0.9754768392370572,
      "VeMo (max entropy view)": 0.960412447063156,
      "VeMo (min entropy view)": 0.9754768392370572,
      "VeMo (random view)": 0.9754768392370572,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person side - steps to the left, stops, and then side - steps to the right."
  },
  "012127": {
    "text": "a man staggers backwards from a standing posture, swinging his arms, before ending in a standing posture.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9570216778091275,
      "Minus Multimodal Distance": -6.079171657562256,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.995886504650116,
      "MoBERT-F": 0.6029012203642647,
      "MoBERT-N": 0.5048297178348495,
      "MoBERT-min(F/N)": 0.5048297178348495,
      "MoBERT-max(F/N)": 0.6029012203642647,
      "MotionCritic": -2.783475875854492,
      "VeMo (human-opt view)": 0.14857142857142858,
      "VeMo (max entropy view)": 0.21195652173913043,
      "VeMo (min entropy view)": 0.14857142857142858,
      "VeMo (random view)": 0.14857142857142858,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man staggers backwards from a standing position, swinging his arms, before ending up in a standing position."
  },
  "012132": {
    "text": "a man does a push up and then uses his arms to balance himself back to his feet.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4385262970169754,
      "Minus Multimodal Distance": -10.328749656677246,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.212321098544635e-05,
      "MoBERT-F": 0.3486508551173122,
      "MoBERT-N": 0.36462932870517634,
      "MoBERT-min(F/N)": 0.3486508551173122,
      "MoBERT-max(F/N)": 0.36462932870517634,
      "MotionCritic": -8.12643814086914,
      "VeMo (human-opt view)": 0.777120315581854,
      "VeMo (max entropy view)": 0.6372980910425844,
      "VeMo (min entropy view)": 0.777120315581854,
      "VeMo (random view)": 0.6372980910425844,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man does a push-up and then uses his arms to balance himself back to his feet."
  },
  "012145": {
    "text": "a person is clapping his hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.546321129689571,
      "Minus Multimodal Distance": -3.00165057182312,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2198055376065895e-05,
      "MoBERT-F": 0.29249400607388776,
      "MoBERT-N": 0.458122378854675,
      "MoBERT-min(F/N)": 0.29249400607388776,
      "MoBERT-max(F/N)": 0.458122378854675,
      "MotionCritic": -10.917404174804688,
      "VeMo (human-opt view)": 0.9940746706552379,
      "VeMo (max entropy view)": 0.9706907153105144,
      "VeMo (min entropy view)": 0.9940746706552379,
      "VeMo (random view)": 0.9940746706552379,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is clapping their hands."
  },
  "012158": {
    "text": "person is walking back and forth",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5833802086237414,
      "Minus Multimodal Distance": -10.39110279083252,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0006406643660739064,
      "MoBERT-F": 0.5191012351026557,
      "MoBERT-N": 0.5347104051658833,
      "MoBERT-min(F/N)": 0.5191012351026557,
      "MoBERT-max(F/N)": 0.5347104051658833,
      "MotionCritic": -10.013452529907227,
      "VeMo (human-opt view)": 0.9975289868846227,
      "VeMo (max entropy view)": 0.9975289868846227,
      "VeMo (min entropy view)": 0.9985905513610702,
      "VeMo (random view)": 0.9975289868846227,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking back and forth."
  },
  "012167": {
    "text": "the right hand flings forward to the left side, the left hand meets the right hand, and is drag back right the shoulder.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0709380872399434,
      "Minus Multimodal Distance": -10.117311477661133,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.7752444768557325e-05,
      "MoBERT-F": 0.45758480142924046,
      "MoBERT-N": 0.4885987868116808,
      "MoBERT-min(F/N)": 0.45758480142924046,
      "MoBERT-max(F/N)": 0.4885987868116808,
      "MotionCritic": -6.870246887207031,
      "VeMo (human-opt view)": 0.7427385892116183,
      "VeMo (max entropy view)": 0.7427385892116183,
      "VeMo (min entropy view)": 0.7776628748707343,
      "VeMo (random view)": 0.7776628748707343,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The right hand flings forward to the left side. The left hand meets the right hand and is dragged back to the right shoulder."
  },
  "012215": {
    "text": "the person is walking slowly like a monster.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2943618125155552,
      "Minus Multimodal Distance": -9.451163291931152,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.111218004254624e-05,
      "MoBERT-F": 0.5143663152849085,
      "MoBERT-N": 0.5685509287064562,
      "MoBERT-min(F/N)": 0.5143663152849085,
      "MoBERT-max(F/N)": 0.5685509287064562,
      "MotionCritic": -6.250760078430176,
      "VeMo (human-opt view)": 0.002799756542909312,
      "VeMo (max entropy view)": 0.01684836471754212,
      "VeMo (min entropy view)": 0.002799756542909312,
      "VeMo (random view)": 0.01684836471754212,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking slowly, like a monster."
  },
  "012235": {
    "text": "a person is doing acting like a little teapot.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6298413512296247,
      "Minus Multimodal Distance": -6.855557441711426,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.3068118095397949,
      "MoBERT-F": 0.6525395006740142,
      "MoBERT-N": 0.47889490829563214,
      "MoBERT-min(F/N)": 0.47889490829563214,
      "MoBERT-max(F/N)": 0.6525395006740142,
      "MotionCritic": -1.1276482343673706,
      "VeMo (human-opt view)": 0.3211845102505695,
      "VeMo (max entropy view)": 0.40821917808219177,
      "VeMo (min entropy view)": 0.3211845102505695,
      "VeMo (random view)": 0.40821917808219177,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is acting like a little teapot."
  },
  "012240": {
    "text": "a person bends down to their left side, then takes their left hand and places it on their right arm, and finally bends back down to the left with both hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7748516788795448,
      "Minus Multimodal Distance": -9.370849609375,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4439241315121762e-05,
      "MoBERT-F": 0.40716489328110556,
      "MoBERT-N": 0.5202486210373957,
      "MoBERT-min(F/N)": 0.40716489328110556,
      "MoBERT-max(F/N)": 0.5202486210373957,
      "MotionCritic": -3.7923531532287598,
      "VeMo (human-opt view)": 0.48424068767908307,
      "VeMo (max entropy view)": 0.48424068767908307,
      "VeMo (min entropy view)": 0.6220735785953178,
      "VeMo (random view)": 0.48424068767908307,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends down to their left side, then uses their left hand to place it on their right arm, and finally bends down to the left again with both hands."
  },
  "012277": {
    "text": "person walks to pick something up then walks back to wipe something with it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.1755476901211606,
      "Minus Multimodal Distance": -8.723044395446777,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00018328215810470283,
      "MoBERT-F": 0.5069433812816546,
      "MoBERT-N": 0.5654928106612199,
      "MoBERT-min(F/N)": 0.5069433812816546,
      "MoBERT-max(F/N)": 0.5654928106612199,
      "MotionCritic": -9.584342956542969,
      "VeMo (human-opt view)": 0.766248574686431,
      "VeMo (max entropy view)": 0.59245960502693,
      "VeMo (min entropy view)": 0.766248574686431,
      "VeMo (random view)": 0.59245960502693,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks to pick something up, then walks back to wipe something with it."
  },
  "012280": {
    "text": "a person walks forward quickly and then stops",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2315436185499475,
      "Minus Multimodal Distance": -5.7767720222473145,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.634307257831097e-05,
      "MoBERT-F": 0.592466723816684,
      "MoBERT-N": 0.7215518661476528,
      "MoBERT-min(F/N)": 0.592466723816684,
      "MoBERT-max(F/N)": 0.7215518661476528,
      "MotionCritic": -10.872123718261719,
      "VeMo (human-opt view)": 0.9956837262249205,
      "VeMo (max entropy view)": 0.9627263045793397,
      "VeMo (min entropy view)": 0.9956837262249205,
      "VeMo (random view)": 0.9956837262249205,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward quickly and then stops."
  },
  "012282": {
    "text": "a man lowers his arms, then moves something in front of his face.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4922783506729116,
      "Minus Multimodal Distance": -6.785809516906738,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.307746217411477e-05,
      "MoBERT-F": 0.3862355560767764,
      "MoBERT-N": 0.43962537841965704,
      "MoBERT-min(F/N)": 0.3862355560767764,
      "MoBERT-max(F/N)": 0.43962537841965704,
      "MotionCritic": -10.214302062988281,
      "VeMo (human-opt view)": 0.5468164794007491,
      "VeMo (max entropy view)": 0.5468164794007491,
      "VeMo (min entropy view)": 0.2691131498470948,
      "VeMo (random view)": 0.5468164794007491,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man lowers his arms and then moves something in front of his face."
  },
  "012310": {
    "text": "the toon walks forward a few steps, then turns around walking back.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9151979708232022,
      "Minus Multimodal Distance": -4.307126998901367,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3374817828880623e-05,
      "MoBERT-F": 0.395209929106274,
      "MoBERT-N": 0.572951763560958,
      "MoBERT-min(F/N)": 0.395209929106274,
      "MoBERT-max(F/N)": 0.572951763560958,
      "MotionCritic": -22.906211853027344,
      "VeMo (human-opt view)": 0.9902956039533116,
      "VeMo (max entropy view)": 0.9323692045937899,
      "VeMo (min entropy view)": 0.9902956039533116,
      "VeMo (random view)": 0.9323692045937899,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The toon walks forward a few steps, then turns around and walks back."
  },
  "012321": {
    "text": "a person throws something and catches something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7876277896873477,
      "Minus Multimodal Distance": -2.6497387886047363,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.24949678778648376,
      "MoBERT-F": 0.6369024649329738,
      "MoBERT-N": 0.533796060308891,
      "MoBERT-min(F/N)": 0.533796060308891,
      "MoBERT-max(F/N)": 0.6369024649329738,
      "MotionCritic": -5.207621097564697,
      "VeMo (human-opt view)": 0.1645274212368728,
      "VeMo (max entropy view)": 0.42203742203742206,
      "VeMo (min entropy view)": 0.1645274212368728,
      "VeMo (random view)": 0.42203742203742206,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person throws something and catches something."
  },
  "012330": {
    "text": "a person crosses their arms, then has their arms angrily at their sides.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6734796543899597,
      "Minus Multimodal Distance": -10.232940673828125,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4083607058855705e-05,
      "MoBERT-F": 0.3298655937826349,
      "MoBERT-N": 0.5246964975156785,
      "MoBERT-min(F/N)": 0.3298655937826349,
      "MoBERT-max(F/N)": 0.5246964975156785,
      "MotionCritic": -4.7422075271606445,
      "VeMo (human-opt view)": 0.08982035928143713,
      "VeMo (max entropy view)": 0.3780487804878049,
      "VeMo (min entropy view)": 0.08982035928143713,
      "VeMo (random view)": 0.3780487804878049,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person crosses their arms, then puts their arms angrily at their sides."
  },
  "012343": {
    "text": "the sim appears to scoot across the plane.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6572790833552529,
      "Minus Multimodal Distance": -3.9545135498046875,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.891041549446527e-05,
      "MoBERT-F": 0.3294900507509538,
      "MoBERT-N": 0.46286116707551905,
      "MoBERT-min(F/N)": 0.3294900507509538,
      "MoBERT-max(F/N)": 0.46286116707551905,
      "MotionCritic": -5.616911888122559,
      "VeMo (human-opt view)": 0.19148936170212766,
      "VeMo (max entropy view)": 0.45368171021377673,
      "VeMo (min entropy view)": 0.19148936170212766,
      "VeMo (random view)": 0.19148936170212766,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The SIM appears to scoot across the plane."
  },
  "012355": {
    "text": "a person is putting both hands to his face.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9965460139085259,
      "Minus Multimodal Distance": -5.039186000823975,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7550768209039234e-05,
      "MoBERT-F": 0.3590487279053868,
      "MoBERT-N": 0.40581372800167814,
      "MoBERT-min(F/N)": 0.3590487279053868,
      "MoBERT-max(F/N)": 0.40581372800167814,
      "MotionCritic": -6.351562976837158,
      "VeMo (human-opt view)": 0.6927835051546392,
      "VeMo (max entropy view)": 0.6927835051546392,
      "VeMo (min entropy view)": 0.8174204355108877,
      "VeMo (random view)": 0.6927835051546392,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is putting both hands on his face."
  },
  "012356": {
    "text": "a person is dancing and starts to take a drink with their left hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2867834416858739,
      "Minus Multimodal Distance": -5.382690906524658,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.021441426128149033,
      "MoBERT-F": 0.6066817292704284,
      "MoBERT-N": 0.5731283525890386,
      "MoBERT-min(F/N)": 0.5731283525890386,
      "MoBERT-max(F/N)": 0.6066817292704284,
      "MotionCritic": -7.919688701629639,
      "VeMo (human-opt view)": 0.00014484254197648285,
      "VeMo (max entropy view)": 0.00014484254197648285,
      "VeMo (min entropy view)": 0.00011286049523185307,
      "VeMo (random view)": 0.00014484254197648285,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is dancing and starts to take a drink with their left hand."
  },
  "012361": {
    "text": "a person picks up  a heavy object and moves it right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8283078703428637,
      "Minus Multimodal Distance": -10.62413501739502,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8251622915267944,
      "MoBERT-F": 0.5978941180506143,
      "MoBERT-N": 0.544968333896002,
      "MoBERT-min(F/N)": 0.544968333896002,
      "MoBERT-max(F/N)": 0.5978941180506143,
      "MotionCritic": -10.94205093383789,
      "VeMo (human-opt view)": 0.0008062742798504728,
      "VeMo (max entropy view)": 0.0055455836257936614,
      "VeMo (min entropy view)": 0.0008062742798504728,
      "VeMo (random view)": 0.0008062742798504728,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person picks up a heavy object and moves it to the right."
  },
  "012362": {
    "text": "a person holds their hands together in front of themself.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6148686967268453,
      "Minus Multimodal Distance": -6.4903764724731445,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.06586007354781e-05,
      "MoBERT-F": 0.44623128262224443,
      "MoBERT-N": 0.48927750644373585,
      "MoBERT-min(F/N)": 0.44623128262224443,
      "MoBERT-max(F/N)": 0.48927750644373585,
      "MotionCritic": -4.335371494293213,
      "VeMo (human-opt view)": 0.9959422714913198,
      "VeMo (max entropy view)": 0.6361974405850092,
      "VeMo (min entropy view)": 0.9959422714913198,
      "VeMo (random view)": 0.6361974405850092,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person holds their hands together in front of themselves."
  },
  "012371": {
    "text": "swaying back-and-forth with their arms out.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9772012678498813,
      "Minus Multimodal Distance": -12.353443145751953,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.543953400570899e-05,
      "MoBERT-F": 0.5250213001513039,
      "MoBERT-N": 0.5585928174557457,
      "MoBERT-min(F/N)": 0.5250213001513039,
      "MoBERT-max(F/N)": 0.5585928174557457,
      "MotionCritic": -8.552173614501953,
      "VeMo (human-opt view)": 0.8523908523908524,
      "VeMo (max entropy view)": 0.8523908523908524,
      "VeMo (min entropy view)": 0.9149828440716736,
      "VeMo (random view)": 0.8523908523908524,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Swaying back and forth with their arms out."
  },
  "012385": {
    "text": "walking forward and then back.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2806611492715845,
      "Minus Multimodal Distance": -9.275651931762695,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2547981643583626e-05,
      "MoBERT-F": 0.40088454544895036,
      "MoBERT-N": 0.48925316785312367,
      "MoBERT-min(F/N)": 0.40088454544895036,
      "MoBERT-max(F/N)": 0.48925316785312367,
      "MotionCritic": -2.09379506111145,
      "VeMo (human-opt view)": 0.9433106575963719,
      "VeMo (max entropy view)": 0.9326705829191143,
      "VeMo (min entropy view)": 0.9433106575963719,
      "VeMo (random view)": 0.9433106575963719,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward and then back."
  },
  "012388": {
    "text": "this person ducks under something then climbs up and over it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.3373793147812214,
      "Minus Multimodal Distance": -13.074231147766113,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.41001744143432e-05,
      "MoBERT-F": 0.3584264255872808,
      "MoBERT-N": 0.5249304247843314,
      "MoBERT-min(F/N)": 0.3584264255872808,
      "MoBERT-max(F/N)": 0.5249304247843314,
      "MotionCritic": -6.907431125640869,
      "VeMo (human-opt view)": 0.5160599571734475,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.5160599571734475,
      "VeMo (random view)": 0.5160599571734475,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person ducks under something, then climbs up and over it."
  },
  "012398": {
    "text": "the person stands and squats a couple of times with their hands out in front of them",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9520195192337882,
      "Minus Multimodal Distance": -9.688154220581055,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 8.749532571528107e-05,
      "MoBERT-F": 0.47445499673504044,
      "MoBERT-N": 0.46355890645138836,
      "MoBERT-min(F/N)": 0.46355890645138836,
      "MoBERT-max(F/N)": 0.47445499673504044,
      "MotionCritic": -5.95367956161499,
      "VeMo (human-opt view)": 0.9961663736200386,
      "VeMo (max entropy view)": 0.9627965043695381,
      "VeMo (min entropy view)": 0.9961663736200386,
      "VeMo (random view)": 0.9627965043695381,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person stands and squats a couple of times with their hands out in front of them."
  },
  "012400": {
    "text": "someone sees something on the ground the they move slowly away from",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.014624326604664,
      "Minus Multimodal Distance": -11.277766227722168,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.124666909570806e-05,
      "MoBERT-F": 0.48330840070708536,
      "MoBERT-N": 0.46409140294959744,
      "MoBERT-min(F/N)": 0.46409140294959744,
      "MoBERT-max(F/N)": 0.48330840070708536,
      "MotionCritic": -4.338868618011475,
      "VeMo (human-opt view)": 0.8267942583732057,
      "VeMo (max entropy view)": 0.8267942583732057,
      "VeMo (min entropy view)": 0.9100817438692098,
      "VeMo (random view)": 0.9100817438692098,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone sees something on the ground, then they move slowly away from it."
  },
  "012414": {
    "text": "a person runs diagonally across a room with their arms swinging hands down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.7468534683815948,
      "Minus Multimodal Distance": -7.146199703216553,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.540595414757263e-05,
      "MoBERT-F": 0.40420941089326856,
      "MoBERT-N": 0.5529022061001019,
      "MoBERT-min(F/N)": 0.40420941089326856,
      "MoBERT-max(F/N)": 0.5529022061001019,
      "MotionCritic": -18.427024841308594,
      "VeMo (human-opt view)": 0.899165061014772,
      "VeMo (max entropy view)": 0.8669354838709677,
      "VeMo (min entropy view)": 0.899165061014772,
      "VeMo (random view)": 0.899165061014772,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs diagonally across a room with their arms swinging and their hands down."
  },
  "012423": {
    "text": "a man stands with his arms at his sides, and sways slightly to his left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6045961470652518,
      "Minus Multimodal Distance": -6.45225191116333,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.80727181234397e-05,
      "MoBERT-F": 0.39836733198388663,
      "MoBERT-N": 0.49391373320260007,
      "MoBERT-min(F/N)": 0.39836733198388663,
      "MoBERT-max(F/N)": 0.49391373320260007,
      "MotionCritic": -9.120956420898438,
      "VeMo (human-opt view)": 0.9100817438692098,
      "VeMo (max entropy view)": 0.8436911487758946,
      "VeMo (min entropy view)": 0.9100817438692098,
      "VeMo (random view)": 0.8436911487758946,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man stands with his arms at his sides and sways slightly to his left."
  },
  "012492": {
    "text": "a person stands with legs shoulder-width apart, slightly bent at the knees, arms outstretched at shoulder height, lowers left arm for several seconds, then brings with arm back to shoulder height.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6701332799549794,
      "Minus Multimodal Distance": -5.03062629699707,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7928523195441812e-05,
      "MoBERT-F": 0.2625748074027854,
      "MoBERT-N": 0.4140479727244737,
      "MoBERT-min(F/N)": 0.2625748074027854,
      "MoBERT-max(F/N)": 0.4140479727244737,
      "MotionCritic": -11.550858497619629,
      "VeMo (human-opt view)": 0.8933333333333333,
      "VeMo (max entropy view)": 0.8669354838709677,
      "VeMo (min entropy view)": 0.8933333333333333,
      "VeMo (random view)": 0.8669354838709677,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands with their legs shoulder - width apart, knees slightly bent, arms outstretched at shoulder height. They lower their left arm for several seconds, then bring the arm back to shoulder height."
  },
  "012495": {
    "text": "a person walks quickly forward, moving at a slight angle to the right",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8557786287368092,
      "Minus Multimodal Distance": -5.437824726104736,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2935422748560086e-05,
      "MoBERT-F": 0.4077540896422921,
      "MoBERT-N": 0.5627139751969382,
      "MoBERT-min(F/N)": 0.4077540896422921,
      "MoBERT-max(F/N)": 0.5627139751969382,
      "MotionCritic": -4.234940052032471,
      "VeMo (human-opt view)": 0.8439716312056738,
      "VeMo (max entropy view)": 0.8439716312056738,
      "VeMo (min entropy view)": 0.852394916911046,
      "VeMo (random view)": 0.852394916911046,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks quickly forward, moving at a slight angle to the right."
  },
  "012498": {
    "text": "pretend to hold a ball in your right hand, toss the ball upward and kick it with your right foot as it falls toward your foot. take steps forward and keep going out of the area.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0261361241897489,
      "Minus Multimodal Distance": -5.975230693817139,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.67435607383959e-05,
      "MoBERT-F": 0.5323592678406823,
      "MoBERT-N": 0.5253358415495692,
      "MoBERT-min(F/N)": 0.5253358415495692,
      "MoBERT-max(F/N)": 0.5323592678406823,
      "MotionCritic": -5.65691614151001,
      "VeMo (human-opt view)": 0.20189274447949526,
      "VeMo (max entropy view)": 0.20189274447949526,
      "VeMo (min entropy view)": 0.07563805104408353,
      "VeMo (random view)": 0.20189274447949526,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person pretends to hold a ball in their right hand, toss the ball upward and kick it with their right foot as it falls toward their foot.  This person takes steps forward and keeps going out of the area."
  },
  "012515": {
    "text": "a person crosses their arms then drops them.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.42296180940056843,
      "Minus Multimodal Distance": -6.103965759277344,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8988300982746296e-05,
      "MoBERT-F": 0.2987783053583252,
      "MoBERT-N": 0.5173062688496579,
      "MoBERT-min(F/N)": 0.2987783053583252,
      "MoBERT-max(F/N)": 0.5173062688496579,
      "MotionCritic": -5.7614569664001465,
      "VeMo (human-opt view)": 0.9997831774244854,
      "VeMo (max entropy view)": 0.8873642081189251,
      "VeMo (min entropy view)": 0.9997831774244854,
      "VeMo (random view)": 0.8873642081189251,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person crosses their arms and then drops them."
  },
  "012529": {
    "text": "a person is dancing the waltz, going in a counter-clockwise direction with the left arm out.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8592901307485916,
      "Minus Multimodal Distance": -8.06088924407959,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.3204694773303345e-05,
      "MoBERT-F": 0.447344379409514,
      "MoBERT-N": 0.472168529228994,
      "MoBERT-min(F/N)": 0.447344379409514,
      "MoBERT-max(F/N)": 0.472168529228994,
      "MotionCritic": -5.9667205810546875,
      "VeMo (human-opt view)": 0.7191489361702128,
      "VeMo (max entropy view)": 0.6657963446475196,
      "VeMo (min entropy view)": 0.7191489361702128,
      "VeMo (random view)": 0.7191489361702128,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is dancing the waltz, going in a counter-clockwise direction with the left arm out."
  },
  "012543": {
    "text": "this person was forward then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9198196958635203,
      "Minus Multimodal Distance": -8.565886497497559,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7353898985893466e-05,
      "MoBERT-F": 0.5317416429369275,
      "MoBERT-N": 0.607655479628924,
      "MoBERT-min(F/N)": 0.5317416429369275,
      "MoBERT-max(F/N)": 0.607655479628924,
      "MotionCritic": -11.002628326416016,
      "VeMo (human-opt view)": 0.9875655430711611,
      "VeMo (max entropy view)": 0.9197336137399229,
      "VeMo (min entropy view)": 0.9875655430711611,
      "VeMo (random view)": 0.9197336137399229,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person was moving forward and then stopped."
  },
  "012554": {
    "text": "a person slowly walked forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9133534052132304,
      "Minus Multimodal Distance": -12.965923309326172,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4295211915159598e-05,
      "MoBERT-F": 0.48155177619355644,
      "MoBERT-N": 0.5991882298715221,
      "MoBERT-min(F/N)": 0.48155177619355644,
      "MoBERT-max(F/N)": 0.5991882298715221,
      "MotionCritic": -0.7030916810035706,
      "VeMo (human-opt view)": 0.9896907216494846,
      "VeMo (max entropy view)": 0.9850397094132857,
      "VeMo (min entropy view)": 0.9896907216494846,
      "VeMo (random view)": 0.9850397094132857,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person slowly walked forward."
  },
  "012555": {
    "text": "a person kicks their right foot in front of them in a dangling motion and then puts it back on the ground.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1156578213755788,
      "Minus Multimodal Distance": -4.182526588439941,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7556761779123917e-05,
      "MoBERT-F": 0.4095060647059281,
      "MoBERT-N": 0.5889370729479586,
      "MoBERT-min(F/N)": 0.4095060647059281,
      "MoBERT-max(F/N)": 0.5889370729479586,
      "MotionCritic": -6.231192588806152,
      "VeMo (human-opt view)": 0.9197769197769198,
      "VeMo (max entropy view)": 0.9197769197769198,
      "VeMo (min entropy view)": 0.9327046720960137,
      "VeMo (random view)": 0.9327046720960137,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person kicks their right foot forward in a dangling motion and then places it back on the ground."
  },
  "012558": {
    "text": "a person walks forward and then pulls something behind them.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.8840974293602384,
      "Minus Multimodal Distance": -7.941183090209961,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.002594300080090761,
      "MoBERT-F": 0.5852766106216764,
      "MoBERT-N": 0.6827551800535662,
      "MoBERT-min(F/N)": 0.5852766106216764,
      "MoBERT-max(F/N)": 0.6827551800535662,
      "MotionCritic": -6.633072376251221,
      "VeMo (human-opt view)": 0.02153140527455727,
      "VeMo (max entropy view)": 0.12605042016806722,
      "VeMo (min entropy view)": 0.02153140527455727,
      "VeMo (random view)": 0.02153140527455727,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward and then pulls something behind them."
  },
  "012559": {
    "text": "a person walking side to side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5080083066909664,
      "Minus Multimodal Distance": -6.441540241241455,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.844368181191385e-05,
      "MoBERT-F": 0.5385265119428508,
      "MoBERT-N": 0.6346504157660054,
      "MoBERT-min(F/N)": 0.5385265119428508,
      "MoBERT-max(F/N)": 0.6346504157660054,
      "MotionCritic": -4.872250080108643,
      "VeMo (human-opt view)": 0.9724687829095833,
      "VeMo (max entropy view)": 0.9497242945183263,
      "VeMo (min entropy view)": 0.9724687829095833,
      "VeMo (random view)": 0.9724687829095833,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking side to side."
  },
  "012561": {
    "text": "peron moves forward with arms shoulder height then fully turns arms to the back and moves further forward then turns to side and moves back quickly",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2816810835663528,
      "Minus Multimodal Distance": -3.0625252723693848,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0006377975805662572,
      "MoBERT-F": 0.6220410842233983,
      "MoBERT-N": 0.5698471952008138,
      "MoBERT-min(F/N)": 0.5698471952008138,
      "MoBERT-max(F/N)": 0.6220410842233983,
      "MotionCritic": -6.286585807800293,
      "VeMo (human-opt view)": 0.5935251798561151,
      "VeMo (max entropy view)": 0.5935251798561151,
      "VeMo (min entropy view)": 0.7189384800965019,
      "VeMo (random view)": 0.7189384800965019,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person moves forward with their arms at shoulder height. Then, they fully turn their arms to the back and move further forward. After that, they turn to the side and move back quickly."
  },
  "012567": {
    "text": "a person rests their hands on their knees while squatting.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6745614085768079,
      "Minus Multimodal Distance": -10.228852272033691,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2771813746658154e-05,
      "MoBERT-F": 0.4060079065247796,
      "MoBERT-N": 0.476234346935828,
      "MoBERT-min(F/N)": 0.4060079065247796,
      "MoBERT-max(F/N)": 0.476234346935828,
      "MotionCritic": -16.454299926757812,
      "VeMo (human-opt view)": 0.9706840390879479,
      "VeMo (max entropy view)": 0.19223300970873786,
      "VeMo (min entropy view)": 0.9706840390879479,
      "VeMo (random view)": 0.9706840390879479,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person rests their hands on their knees while squatting."
  },
  "012568": {
    "text": "the person does a couple of small kicks with his left leg",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3388589415196295,
      "Minus Multimodal Distance": -10.270730972290039,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2791344235884026e-05,
      "MoBERT-F": 0.3644825030412615,
      "MoBERT-N": 0.6017581469968784,
      "MoBERT-min(F/N)": 0.3644825030412615,
      "MoBERT-max(F/N)": 0.6017581469968784,
      "MotionCritic": -10.173173904418945,
      "VeMo (human-opt view)": 0.0011705033164260631,
      "VeMo (max entropy view)": 0.01320786803469191,
      "VeMo (min entropy view)": 0.0011705033164260631,
      "VeMo (random view)": 0.01320786803469191,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person does a couple of small kicks with his left leg."
  },
  "012577": {
    "text": "a person pick up something with his left hand and with his right hand he is patting it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9197106967484449,
      "Minus Multimodal Distance": -9.528250694274902,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.45626342803007e-05,
      "MoBERT-F": 0.3395807098660234,
      "MoBERT-N": 0.4378078586083159,
      "MoBERT-min(F/N)": 0.3395807098660234,
      "MoBERT-max(F/N)": 0.4378078586083159,
      "MotionCritic": -7.57420015335083,
      "VeMo (human-opt view)": 0.014951627088830254,
      "VeMo (max entropy view)": 0.031023592792123352,
      "VeMo (min entropy view)": 0.014951627088830254,
      "VeMo (random view)": 0.031023592792123352,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person picks up something with his left hand and pats it with his right hand."
  },
  "012578": {
    "text": "person person is planting vegetables.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9539533832911382,
      "Minus Multimodal Distance": -4.597081184387207,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.6054050326347351,
      "MoBERT-F": 0.6031922309993507,
      "MoBERT-N": 0.4410832301677612,
      "MoBERT-min(F/N)": 0.4410832301677612,
      "MoBERT-max(F/N)": 0.6031922309993507,
      "MotionCritic": -8.469573974609375,
      "VeMo (human-opt view)": 9.255245410336308e-06,
      "VeMo (max entropy view)": 2.019281557840802e-05,
      "VeMo (min entropy view)": 9.255245410336308e-06,
      "VeMo (random view)": 9.255245410336308e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is planting vegetables."
  },
  "012579": {
    "text": "a person steps forward, then kneels down using their left hand for support.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1046699038330592,
      "Minus Multimodal Distance": -2.88389253616333,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8195570848765783e-05,
      "MoBERT-F": 0.3603199601281859,
      "MoBERT-N": 0.4669943825097789,
      "MoBERT-min(F/N)": 0.3603199601281859,
      "MoBERT-max(F/N)": 0.4669943825097789,
      "MotionCritic": -10.904731750488281,
      "VeMo (human-opt view)": 0.9325952458482579,
      "VeMo (max entropy view)": 0.9284064665127021,
      "VeMo (min entropy view)": 0.9325952458482579,
      "VeMo (random view)": 0.9325952458482579,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person steps forward and then kneels down, using their left hand for support."
  },
  "012584": {
    "text": "a person reaches his left arm straight out",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5333738682965492,
      "Minus Multimodal Distance": -9.139448165893555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7256457542534918e-05,
      "MoBERT-F": 0.35263414010147953,
      "MoBERT-N": 0.5121380942503001,
      "MoBERT-min(F/N)": 0.35263414010147953,
      "MoBERT-max(F/N)": 0.5121380942503001,
      "MotionCritic": -9.257545471191406,
      "VeMo (human-opt view)": 0.955329500221141,
      "VeMo (max entropy view)": 0.955329500221141,
      "VeMo (min entropy view)": 0.9850422718404509,
      "VeMo (random view)": 0.9850422718404509,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person reaches his left arm straight out."
  },
  "012595": {
    "text": "the person is walking straight backwards.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8327763949819417,
      "Minus Multimodal Distance": -3.1988143920898438,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.895715988415759e-05,
      "MoBERT-F": 0.4903513515203656,
      "MoBERT-N": 0.561783718331391,
      "MoBERT-min(F/N)": 0.4903513515203656,
      "MoBERT-max(F/N)": 0.561783718331391,
      "MotionCritic": -8.2223539352417,
      "VeMo (human-opt view)": 0.6797853309481217,
      "VeMo (max entropy view)": 0.6797853309481217,
      "VeMo (min entropy view)": 0.9193934557063048,
      "VeMo (random view)": 0.6797853309481217,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking straight backwards."
  },
  "012601": {
    "text": "the person sat down and crossed his legs.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0191971559424384,
      "Minus Multimodal Distance": -7.139037609100342,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3745060389046557e-05,
      "MoBERT-F": 0.38562986968762925,
      "MoBERT-N": 0.5344514714522779,
      "MoBERT-min(F/N)": 0.38562986968762925,
      "MoBERT-max(F/N)": 0.5344514714522779,
      "MotionCritic": -4.628066539764404,
      "VeMo (human-opt view)": 0.6371308016877637,
      "VeMo (max entropy view)": 0.6371308016877637,
      "VeMo (min entropy view)": 0.09551656920077972,
      "VeMo (random view)": 0.09551656920077972,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person sat down and crossed his legs."
  },
  "012602": {
    "text": "a person is repeatedly raising and lowering their forearms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.028916027580624,
      "Minus Multimodal Distance": -3.879709005355835,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8292131901253015e-05,
      "MoBERT-F": 0.3395141890399892,
      "MoBERT-N": 0.4677007912977608,
      "MoBERT-min(F/N)": 0.3395141890399892,
      "MoBERT-max(F/N)": 0.4677007912977608,
      "MotionCritic": -11.084244728088379,
      "VeMo (human-opt view)": 0.8668866886688669,
      "VeMo (max entropy view)": 0.8668866886688669,
      "VeMo (min entropy view)": 0.9146816803811174,
      "VeMo (random view)": 0.8668866886688669,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is repeatedly raising and lowering their forearms."
  },
  "012616": {
    "text": "a person walks in a clockwise circle while bent forward at the waist, and holding their right leg with both hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.346693717899909,
      "Minus Multimodal Distance": -9.250065803527832,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00019679602701216936,
      "MoBERT-F": 0.49165109084990655,
      "MoBERT-N": 0.45402539487002214,
      "MoBERT-min(F/N)": 0.45402539487002214,
      "MoBERT-max(F/N)": 0.49165109084990655,
      "MotionCritic": -8.29666519165039,
      "VeMo (human-opt view)": 0.37756714060031593,
      "VeMo (max entropy view)": 0.5311203319502075,
      "VeMo (min entropy view)": 0.37756714060031593,
      "VeMo (random view)": 0.37756714060031593,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks in a clockwise circle while bent forward at the waist and holding their right leg with both hands."
  },
  "012625": {
    "text": "subject kicks out right leg.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.115657187044513,
      "Minus Multimodal Distance": -8.431695938110352,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3090713511919603e-05,
      "MoBERT-F": 0.41395738981095603,
      "MoBERT-N": 0.5450015748682748,
      "MoBERT-min(F/N)": 0.41395738981095603,
      "MoBERT-max(F/N)": 0.5450015748682748,
      "MotionCritic": -13.614795684814453,
      "VeMo (human-opt view)": 0.989040758527275,
      "VeMo (max entropy view)": 0.9648998822143698,
      "VeMo (min entropy view)": 0.989040758527275,
      "VeMo (random view)": 0.9648998822143698,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Subject kicks out the right leg."
  },
  "012639": {
    "text": "a person has their right hand on their head and walks around in a circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.8741325221924943,
      "Minus Multimodal Distance": -10.637314796447754,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.336760998237878e-05,
      "MoBERT-F": 0.5373779797379173,
      "MoBERT-N": 0.6022383433729348,
      "MoBERT-min(F/N)": 0.5373779797379173,
      "MoBERT-max(F/N)": 0.6022383433729348,
      "MotionCritic": -11.598733901977539,
      "VeMo (human-opt view)": 0.8738229755178908,
      "VeMo (max entropy view)": 0.7878172588832487,
      "VeMo (min entropy view)": 0.8738229755178908,
      "VeMo (random view)": 0.8738229755178908,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person has their right hand on their head and walks around in a circle."
  },
  "012655": {
    "text": "the toon jogs in place.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6535624147734086,
      "Minus Multimodal Distance": -14.241764068603516,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.408615910098888e-05,
      "MoBERT-F": 0.33235222534976855,
      "MoBERT-N": 0.5082756136198489,
      "MoBERT-min(F/N)": 0.33235222534976855,
      "MoBERT-max(F/N)": 0.5082756136198489,
      "MotionCritic": -7.773727893829346,
      "VeMo (human-opt view)": 3.323778603690327e-05,
      "VeMo (max entropy view)": 0.001241354850150736,
      "VeMo (min entropy view)": 3.323778603690327e-05,
      "VeMo (random view)": 3.323778603690327e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The toon jogs in place."
  },
  "012673": {
    "text": "a person slowly walked forward and sat on something",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.8947950441715193,
      "Minus Multimodal Distance": -7.674236297607422,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0001713754900265485,
      "MoBERT-F": 0.5502049634964059,
      "MoBERT-N": 0.5457838513406691,
      "MoBERT-min(F/N)": 0.5457838513406691,
      "MoBERT-max(F/N)": 0.5502049634964059,
      "MotionCritic": -13.053022384643555,
      "VeMo (human-opt view)": 0.9362928797924474,
      "VeMo (max entropy view)": 0.9362928797924474,
      "VeMo (min entropy view)": 0.979702300405954,
      "VeMo (random view)": 0.979702300405954,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walked forward slowly and sat on something."
  },
  "012675": {
    "text": "person walks with both arms straight forward",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5104473731587376,
      "Minus Multimodal Distance": -8.789253234863281,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9832970540155657e-05,
      "MoBERT-F": 0.4358949739721525,
      "MoBERT-N": 0.5423393453942689,
      "MoBERT-min(F/N)": 0.4358949739721525,
      "MoBERT-max(F/N)": 0.5423393453942689,
      "MotionCritic": -1.3776289224624634,
      "VeMo (human-opt view)": 0.7665647298674821,
      "VeMo (max entropy view)": 0.36316695352839934,
      "VeMo (min entropy view)": 0.7665647298674821,
      "VeMo (random view)": 0.7665647298674821,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks with both arms stretched straight forward."
  },
  "012679": {
    "text": "a person kneels down firstly on his right, then his left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0754964324246439,
      "Minus Multimodal Distance": -7.258932590484619,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.509068144718185e-05,
      "MoBERT-F": 0.38988802897241015,
      "MoBERT-N": 0.5678357270073,
      "MoBERT-min(F/N)": 0.38988802897241015,
      "MoBERT-max(F/N)": 0.5678357270073,
      "MotionCritic": -4.811512470245361,
      "VeMo (human-opt view)": 0.8740987243483084,
      "VeMo (max entropy view)": 0.8740987243483084,
      "VeMo (min entropy view)": 0.9048991354466859,
      "VeMo (random view)": 0.8740987243483084,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person kneels down first on his right knee, then on his left knee."
  },
  "012686": {
    "text": "laying down on face and crawling backwards.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.7058014799599084,
      "Minus Multimodal Distance": -5.348807334899902,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3326761947828345e-05,
      "MoBERT-F": 0.46140267451930617,
      "MoBERT-N": 0.4653952686153783,
      "MoBERT-min(F/N)": 0.46140267451930617,
      "MoBERT-max(F/N)": 0.4653952686153783,
      "MotionCritic": -6.10911226272583,
      "VeMo (human-opt view)": 0.9649667405764967,
      "VeMo (max entropy view)": 0.9578574241827491,
      "VeMo (min entropy view)": 0.9649667405764967,
      "VeMo (random view)": 0.9649667405764967,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is lying down on their face and crawling backwards."
  },
  "012691": {
    "text": "the person took a large side step to the right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7654578178246638,
      "Minus Multimodal Distance": -11.213812828063965,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.268773798481561e-05,
      "MoBERT-F": 0.5094473092697543,
      "MoBERT-N": 0.6895354961371216,
      "MoBERT-min(F/N)": 0.5094473092697543,
      "MoBERT-max(F/N)": 0.6895354961371216,
      "MotionCritic": -3.9205236434936523,
      "VeMo (human-opt view)": 0.9243840271877655,
      "VeMo (max entropy view)": 0.9243840271877655,
      "VeMo (min entropy view)": 0.9362865736942849,
      "VeMo (random view)": 0.9362865736942849,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person took a large side step to the right."
  },
  "012716": {
    "text": "a person continuously jogs counter clockwise.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -2.435143534135146,
      "Minus Multimodal Distance": -5.976030349731445,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2478185201180167e-05,
      "MoBERT-F": 0.4190438585336631,
      "MoBERT-N": 0.5519096163621944,
      "MoBERT-min(F/N)": 0.4190438585336631,
      "MoBERT-max(F/N)": 0.5519096163621944,
      "MotionCritic": -8.827012062072754,
      "VeMo (human-opt view)": 0.9199839163650985,
      "VeMo (max entropy view)": 0.8806539509536785,
      "VeMo (min entropy view)": 0.9199839163650985,
      "VeMo (random view)": 0.9199839163650985,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person continuously jogs counter - clockwise."
  },
  "012735": {
    "text": "standing person raises right leg and both arms, turns slightly to the left, then returns to original standing position.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3370359317942022,
      "Minus Multimodal Distance": -9.708941459655762,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.581513581390027e-05,
      "MoBERT-F": 0.33398604296183737,
      "MoBERT-N": 0.5325210902586792,
      "MoBERT-min(F/N)": 0.33398604296183737,
      "MoBERT-max(F/N)": 0.5325210902586792,
      "MotionCritic": -7.803898334503174,
      "VeMo (human-opt view)": 0.03972563589597028,
      "VeMo (max entropy view)": 0.07588985896574882,
      "VeMo (min entropy view)": 0.03972563589597028,
      "VeMo (random view)": 0.03972563589597028,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A standing person raises their right leg and both arms, turns slightly to the left, then returns to the original standing position."
  },
  "012741": {
    "text": "the man walks and turns right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1178611109120882,
      "Minus Multimodal Distance": -13.350598335266113,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6814428565558046e-05,
      "MoBERT-F": 0.425357687239141,
      "MoBERT-N": 0.5788935173466638,
      "MoBERT-min(F/N)": 0.425357687239141,
      "MoBERT-max(F/N)": 0.5788935173466638,
      "MotionCritic": -3.525498628616333,
      "VeMo (human-opt view)": 0.9724888035828535,
      "VeMo (max entropy view)": 0.9724888035828535,
      "VeMo (min entropy view)": 0.9850317124735729,
      "VeMo (random view)": 0.9850317124735729,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man walks and turns right."
  },
  "012789": {
    "text": "person is throwing and catching a football.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7638922475475916,
      "Minus Multimodal Distance": -11.622861862182617,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.020020196214318275,
      "MoBERT-F": 0.5590912133841208,
      "MoBERT-N": 0.45906149930286194,
      "MoBERT-min(F/N)": 0.45906149930286194,
      "MoBERT-max(F/N)": 0.5590912133841208,
      "MotionCritic": -12.568135261535645,
      "VeMo (human-opt view)": 4.978935888972942e-05,
      "VeMo (max entropy view)": 0.005225780517218277,
      "VeMo (min entropy view)": 4.978935888972942e-05,
      "VeMo (random view)": 0.005225780517218277,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is throwing and catching a football."
  },
  "012791": {
    "text": "a person standing raises his right hand to talk on the phone.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0892134114866499,
      "Minus Multimodal Distance": -10.532214164733887,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4139988454408012e-05,
      "MoBERT-F": 0.3028420315983924,
      "MoBERT-N": 0.44657210029872735,
      "MoBERT-min(F/N)": 0.3028420315983924,
      "MoBERT-max(F/N)": 0.44657210029872735,
      "MotionCritic": -5.5023956298828125,
      "VeMo (human-opt view)": 0.0003904724716907458,
      "VeMo (max entropy view)": 0.00043051549049988146,
      "VeMo (min entropy view)": 0.0003904724716907458,
      "VeMo (random view)": 0.00043051549049988146,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person standing raises his right hand to talk on the phone."
  },
  "012798": {
    "text": "a person holds something above, and swings.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4889820925619615,
      "Minus Multimodal Distance": -6.0016045570373535,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00025257148081436753,
      "MoBERT-F": 0.4887766149922407,
      "MoBERT-N": 0.4006619371189449,
      "MoBERT-min(F/N)": 0.4006619371189449,
      "MoBERT-max(F/N)": 0.4887766149922407,
      "MotionCritic": -9.03231430053711,
      "VeMo (human-opt view)": 0.8736196319018404,
      "VeMo (max entropy view)": 0.8181818181818182,
      "VeMo (min entropy view)": 0.8736196319018404,
      "VeMo (random view)": 0.8181818181818182,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person holds something above and swings it."
  },
  "012799": {
    "text": "a person is shooting a basketball with both hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8184506176067723,
      "Minus Multimodal Distance": -9.180570602416992,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00021517190907616168,
      "MoBERT-F": 0.5276210437931662,
      "MoBERT-N": 0.43509241616546895,
      "MoBERT-min(F/N)": 0.43509241616546895,
      "MoBERT-max(F/N)": 0.5276210437931662,
      "MotionCritic": -6.1379289627075195,
      "VeMo (human-opt view)": 5.300140534029311e-05,
      "VeMo (max entropy view)": 0.0009078481910863917,
      "VeMo (min entropy view)": 5.300140534029311e-05,
      "VeMo (random view)": 0.0009078481910863917,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is shooting a basketball with both hands."
  },
  "012805": {
    "text": "a man crouches down while quickly walking forward and then stands up straight.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.394434295325188,
      "Minus Multimodal Distance": -5.232781887054443,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.01009899191558361,
      "MoBERT-F": 0.6288608300360492,
      "MoBERT-N": 0.5603818210434388,
      "MoBERT-min(F/N)": 0.5603818210434388,
      "MoBERT-max(F/N)": 0.6288608300360492,
      "MotionCritic": -9.481403350830078,
      "VeMo (human-opt view)": 0.7056019070321812,
      "VeMo (max entropy view)": 0.7056019070321812,
      "VeMo (min entropy view)": 0.8172043010752689,
      "VeMo (random view)": 0.7056019070321812,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man crouches down while quickly walking forward and then stands up straight."
  },
  "012837": {
    "text": "a person raised his hand, and leaned it near to face",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7891676664626502,
      "Minus Multimodal Distance": -6.763676643371582,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.0782874091528356e-05,
      "MoBERT-F": 0.35663179352698066,
      "MoBERT-N": 0.49136277698476344,
      "MoBERT-min(F/N)": 0.35663179352698066,
      "MoBERT-max(F/N)": 0.49136277698476344,
      "MotionCritic": -9.185322761535645,
      "VeMo (human-opt view)": 0.9961923093016444,
      "VeMo (max entropy view)": 0.9924144000670551,
      "VeMo (min entropy view)": 0.9961923093016444,
      "VeMo (random view)": 0.9924144000670551,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raised his hand and leaned it close to his face."
  },
  "012877": {
    "text": "a person start to dance with legs",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7431620915606862,
      "Minus Multimodal Distance": -14.39586353302002,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8199363441672176e-05,
      "MoBERT-F": 0.35824906832374903,
      "MoBERT-N": 0.5528160346804916,
      "MoBERT-min(F/N)": 0.35824906832374903,
      "MoBERT-max(F/N)": 0.5528160346804916,
      "MotionCritic": -6.694211483001709,
      "VeMo (human-opt view)": 0.03962338171832091,
      "VeMo (max entropy view)": 0.085,
      "VeMo (min entropy view)": 0.03962338171832091,
      "VeMo (random view)": 0.03962338171832091,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person starts to dance with their legs."
  },
  "012925": {
    "text": "a person slowly paces back and forth.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6108546374699413,
      "Minus Multimodal Distance": -12.704955101013184,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.22200514376163483,
      "MoBERT-F": 0.7020027190826688,
      "MoBERT-N": 0.66122651579205,
      "MoBERT-min(F/N)": 0.66122651579205,
      "MoBERT-max(F/N)": 0.7020027190826688,
      "MotionCritic": -13.41080093383789,
      "VeMo (human-opt view)": 0.9940936531244902,
      "VeMo (max entropy view)": 0.9929020482660718,
      "VeMo (min entropy view)": 0.9940936531244902,
      "VeMo (random view)": 0.9929020482660718,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person slowly paces back and forth."
  },
  "012941": {
    "text": "person stands still with arms bent forwards, as if gripping some handlebars.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.629217747520154,
      "Minus Multimodal Distance": -9.38712215423584,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4699056666577235e-05,
      "MoBERT-F": 0.390684804181187,
      "MoBERT-N": 0.4778227420276935,
      "MoBERT-min(F/N)": 0.390684804181187,
      "MoBERT-max(F/N)": 0.4778227420276935,
      "MotionCritic": -3.1624972820281982,
      "VeMo (human-opt view)": 0.7880386983289358,
      "VeMo (max entropy view)": 0.4380165289256198,
      "VeMo (min entropy view)": 0.7880386983289358,
      "VeMo (random view)": 0.7880386983289358,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands still with arms bent forward, as if they are gripping some handlebars."
  },
  "012956": {
    "text": "person seems to be preparing food",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5833027763063262,
      "Minus Multimodal Distance": -9.940811157226562,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.06679867208004e-05,
      "MoBERT-F": 0.5313819720993945,
      "MoBERT-N": 0.5942598977247389,
      "MoBERT-min(F/N)": 0.5313819720993945,
      "MoBERT-max(F/N)": 0.5942598977247389,
      "MotionCritic": -1.4006266593933105,
      "VeMo (human-opt view)": 0.0008074051681583892,
      "VeMo (max entropy view)": 0.0008074051681583892,
      "VeMo (min entropy view)": 1.8925271672274855e-05,
      "VeMo (random view)": 0.0008074051681583892,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person seems to be preparing food."
  },
  "012968": {
    "text": "this person zig zags forward then stops to the right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.349238567901686,
      "Minus Multimodal Distance": -6.816256999969482,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9701567888259888,
      "MoBERT-F": 0.8133783733122063,
      "MoBERT-N": 0.8543857070998382,
      "MoBERT-min(F/N)": 0.8133783733122063,
      "MoBERT-max(F/N)": 0.8543857070998382,
      "MotionCritic": -1.6689313650131226,
      "VeMo (human-opt view)": 0.7881773399014779,
      "VeMo (max entropy view)": 0.7881773399014779,
      "VeMo (min entropy view)": 0.8350877192982457,
      "VeMo (random view)": 0.7881773399014779,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person zigzags forward, then stops at the right."
  },
  "012981": {
    "text": "a person waves their arms over their heads.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4290753058503062,
      "Minus Multimodal Distance": -16.125473022460938,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.001539176912046969,
      "MoBERT-F": 0.5190450936637182,
      "MoBERT-N": 0.5248982952292587,
      "MoBERT-min(F/N)": 0.5190450936637182,
      "MoBERT-max(F/N)": 0.5248982952292587,
      "MotionCritic": -6.556694030761719,
      "VeMo (human-opt view)": 0.9933049300060864,
      "VeMo (max entropy view)": 0.9933049300060864,
      "VeMo (min entropy view)": 0.9944397897783532,
      "VeMo (random view)": 0.9933049300060864,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person waves their arms over their head."
  },
  "012992": {
    "text": "the person is making up and down hand gestures.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2812325962174227,
      "Minus Multimodal Distance": -7.525134563446045,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.1571514904499054,
      "MoBERT-F": 0.5501900696707466,
      "MoBERT-N": 0.5860402119913031,
      "MoBERT-min(F/N)": 0.5501900696707466,
      "MoBERT-max(F/N)": 0.5860402119913031,
      "MotionCritic": -9.74422836303711,
      "VeMo (human-opt view)": 0.7875816993464052,
      "VeMo (max entropy view)": 0.717607973421927,
      "VeMo (min entropy view)": 0.7875816993464052,
      "VeMo (random view)": 0.7875816993464052,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is making up - and - down hand gestures."
  },
  "013000": {
    "text": "a person gets pushed to their right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8748975429789344,
      "Minus Multimodal Distance": -10.89824390411377,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.282286004628986e-05,
      "MoBERT-F": 0.3966033861142027,
      "MoBERT-N": 0.5434011363834765,
      "MoBERT-min(F/N)": 0.3966033861142027,
      "MoBERT-max(F/N)": 0.5434011363834765,
      "MotionCritic": -16.53505516052246,
      "VeMo (human-opt view)": 0.7190635451505016,
      "VeMo (max entropy view)": 0.7190635451505016,
      "VeMo (min entropy view)": 0.7427652733118971,
      "VeMo (random view)": 0.7427652733118971,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person gets pushed to their right."
  },
  "013003": {
    "text": "a person, while running quickly, bends down and picks something up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -3.282268795298581,
      "Minus Multimodal Distance": -7.044220447540283,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4998156732181087e-05,
      "MoBERT-F": 0.33319110140280406,
      "MoBERT-N": 0.5200967729194483,
      "MoBERT-min(F/N)": 0.33319110140280406,
      "MoBERT-max(F/N)": 0.5200967729194483,
      "MotionCritic": -11.847949981689453,
      "VeMo (human-opt view)": 0.7544783983140148,
      "VeMo (max entropy view)": 0.48491879350348027,
      "VeMo (min entropy view)": 0.7544783983140148,
      "VeMo (random view)": 0.48491879350348027,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person, while running quickly, bends down and picks something up."
  },
  "013004": {
    "text": "a man swings his right arm behind him and then swings it forward and down toward the left, as if he is brushing something aside,",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6194636984425221,
      "Minus Multimodal Distance": -10.61055850982666,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4792863769107498e-05,
      "MoBERT-F": 0.4619319673291832,
      "MoBERT-N": 0.5161863920454766,
      "MoBERT-min(F/N)": 0.4619319673291832,
      "MoBERT-max(F/N)": 0.5161863920454766,
      "MotionCritic": -4.699406623840332,
      "VeMo (human-opt view)": 0.7181818181818181,
      "VeMo (max entropy view)": 0.7181818181818181,
      "VeMo (min entropy view)": 0.836027713625866,
      "VeMo (random view)": 0.7181818181818181,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man swings his right arm behind him, then swings it forward and down toward the left, as if he is brushing something aside."
  },
  "013023": {
    "text": "person moves i a anticlockwise direction in a circle by sprinting",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.8247711478477333,
      "Minus Multimodal Distance": -8.149106979370117,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3471988242818043e-05,
      "MoBERT-F": 0.4810311264823044,
      "MoBERT-N": 0.5550632221340357,
      "MoBERT-min(F/N)": 0.4810311264823044,
      "MoBERT-max(F/N)": 0.5550632221340357,
      "MotionCritic": -7.924271106719971,
      "VeMo (human-opt view)": 0.7876025524156791,
      "VeMo (max entropy view)": 0.7186147186147186,
      "VeMo (min entropy view)": 0.7876025524156791,
      "VeMo (random view)": 0.7186147186147186,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sprints in a circle in an anticlockwise direction."
  },
  "013046": {
    "text": "the sim is standing still before extending their right arm up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.461633646301936,
      "Minus Multimodal Distance": -11.01250171661377,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.413467336737085e-05,
      "MoBERT-F": 0.3746361731312843,
      "MoBERT-N": 0.5054970677384036,
      "MoBERT-min(F/N)": 0.3746361731312843,
      "MoBERT-max(F/N)": 0.5054970677384036,
      "MotionCritic": -7.839858531951904,
      "VeMo (human-opt view)": 0.859465737514518,
      "VeMo (max entropy view)": 0.859465737514518,
      "VeMo (min entropy view)": 0.8872638634978671,
      "VeMo (random view)": 0.859465737514518,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The Sim is standing still before extending their right arm up."
  },
  "013056": {
    "text": "a figure quickly walks forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0829891953161026,
      "Minus Multimodal Distance": -8.959165573120117,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8075824957340956e-05,
      "MoBERT-F": 0.555486633201703,
      "MoBERT-N": 0.6541819011069981,
      "MoBERT-min(F/N)": 0.555486633201703,
      "MoBERT-max(F/N)": 0.6541819011069981,
      "MotionCritic": -10.633036613464355,
      "VeMo (human-opt view)": 0.8599605522682445,
      "VeMo (max entropy view)": 0.8517241379310345,
      "VeMo (min entropy view)": 0.8599605522682445,
      "VeMo (random view)": 0.8517241379310345,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure walks forward quickly."
  },
  "013068": {
    "text": "the person is walking but struggling with the knee",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.7992911677150216,
      "Minus Multimodal Distance": -7.9677300453186035,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0031884931959211826,
      "MoBERT-F": 0.5856762436071694,
      "MoBERT-N": 0.5677080773346141,
      "MoBERT-min(F/N)": 0.5677080773346141,
      "MoBERT-max(F/N)": 0.5856762436071694,
      "MotionCritic": -8.828210830688477,
      "VeMo (human-opt view)": 0.851520572450805,
      "VeMo (max entropy view)": 0.5920245398773006,
      "VeMo (min entropy view)": 0.851520572450805,
      "VeMo (random view)": 0.851520572450805,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking but struggling with their knee."
  },
  "013088": {
    "text": "the person picks up the bottle of shampoo to wash the dog",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9879435909586199,
      "Minus Multimodal Distance": -6.091144561767578,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5309467673650943e-05,
      "MoBERT-F": 0.38994995684151756,
      "MoBERT-N": 0.47574317186102183,
      "MoBERT-min(F/N)": 0.38994995684151756,
      "MoBERT-max(F/N)": 0.47574317186102183,
      "MotionCritic": -6.288113594055176,
      "VeMo (human-opt view)": 8.40332046151036e-06,
      "VeMo (max entropy view)": 1.1109345810779465e-05,
      "VeMo (min entropy view)": 8.40332046151036e-06,
      "VeMo (random view)": 1.1109345810779465e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person picks up the bottle of shampoo to wash the dog."
  },
  "013112": {
    "text": "a person grabs a small invisible object from his left and places it directly in front of them.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5953924088425951,
      "Minus Multimodal Distance": -10.439874649047852,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3258966393768787e-05,
      "MoBERT-F": 0.45043297087864564,
      "MoBERT-N": 0.528379057095516,
      "MoBERT-min(F/N)": 0.45043297087864564,
      "MoBERT-max(F/N)": 0.528379057095516,
      "MotionCritic": -3.624755620956421,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.43873517786561267,
      "VeMo (random view)": 0.5,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person grabs a small invisible object from their left and places it directly in front of them."
  },
  "013130": {
    "text": "a person runs forward with one leg crossing in front of the other repetitively before coming to a stop.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3274275877158785,
      "Minus Multimodal Distance": -8.608756065368652,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0014470978640019894,
      "MoBERT-F": 0.5572353942676114,
      "MoBERT-N": 0.5991367756943287,
      "MoBERT-min(F/N)": 0.5572353942676114,
      "MoBERT-max(F/N)": 0.5991367756943287,
      "MotionCritic": -5.21365213394165,
      "VeMo (human-opt view)": 0.4218487394957983,
      "VeMo (max entropy view)": 0.4218487394957983,
      "VeMo (min entropy view)": 0.1558011049723757,
      "VeMo (random view)": 0.4218487394957983,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person runs forward with one leg crossing in front of the other repetitively before coming to a stop."
  },
  "013154": {
    "text": "a man steps forward, then brushes something at knee height with his right arm, then steps back and stretches his arms out to either side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1737157325079255,
      "Minus Multimodal Distance": -5.464877605438232,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4940722141764127e-05,
      "MoBERT-F": 0.4275077637866136,
      "MoBERT-N": 0.4675424373946606,
      "MoBERT-min(F/N)": 0.4275077637866136,
      "MoBERT-max(F/N)": 0.4675424373946606,
      "MotionCritic": -2.3871002197265625,
      "VeMo (human-opt view)": 0.039740470397404706,
      "VeMo (max entropy view)": 0.11922663802363051,
      "VeMo (min entropy view)": 0.039740470397404706,
      "VeMo (random view)": 0.039740470397404706,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man steps forward, then brushes something at knee height with his right arm. Then he steps back and stretches his arms out to either side."
  },
  "013160": {
    "text": "the sim appears to walk forward bend slightly grabbing an object with their left hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.154276543936509,
      "Minus Multimodal Distance": -6.778549671173096,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3549251636723056e-05,
      "MoBERT-F": 0.4562458534319268,
      "MoBERT-N": 0.45315271043765565,
      "MoBERT-min(F/N)": 0.45315271043765565,
      "MoBERT-max(F/N)": 0.4562458534319268,
      "MotionCritic": -5.882615089416504,
      "VeMo (human-opt view)": 0.08023483365949119,
      "VeMo (max entropy view)": 0.08023483365949119,
      "VeMo (min entropy view)": 0.035175879396984924,
      "VeMo (random view)": 0.08023483365949119,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The Sim appears to walk forward, bend slightly, and grab an object with their left hand."
  },
  "013164": {
    "text": "the person is standing with arms by their side",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8410962499897818,
      "Minus Multimodal Distance": -2.7149298191070557,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2871447072247975e-05,
      "MoBERT-F": 0.42479192378970515,
      "MoBERT-N": 0.4913821102515141,
      "MoBERT-min(F/N)": 0.42479192378970515,
      "MoBERT-max(F/N)": 0.4913821102515141,
      "MotionCritic": -9.921899795532227,
      "VeMo (human-opt view)": 0.9101123595505618,
      "VeMo (max entropy view)": 0.5919003115264797,
      "VeMo (min entropy view)": 0.9101123595505618,
      "VeMo (random view)": 0.9101123595505618,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is standing with their arms by their sides."
  },
  "013174": {
    "text": "a person walks forward and picks up and moves a heavy object.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5665665697109934,
      "Minus Multimodal Distance": -10.534586906433105,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9970765113830566,
      "MoBERT-F": 0.6588748351981458,
      "MoBERT-N": 0.7016188878722924,
      "MoBERT-min(F/N)": 0.6588748351981458,
      "MoBERT-max(F/N)": 0.7016188878722924,
      "MotionCritic": -7.273092746734619,
      "VeMo (human-opt view)": 0.020350380463634753,
      "VeMo (max entropy view)": 0.020350380463634753,
      "VeMo (min entropy view)": 0.001499443458102974,
      "VeMo (random view)": 0.001499443458102974,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, picks up a heavy object, and moves it."
  },
  "013175": {
    "text": "the person left of the dumbbell over his head.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8434824200741625,
      "Minus Multimodal Distance": -2.942683458328247,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.509108708181884e-05,
      "MoBERT-F": 0.37905325197828926,
      "MoBERT-N": 0.5144443415118467,
      "MoBERT-min(F/N)": 0.37905325197828926,
      "MoBERT-max(F/N)": 0.5144443415118467,
      "MotionCritic": -7.43085241317749,
      "VeMo (human-opt view)": 0.020312040035325288,
      "VeMo (max entropy view)": 0.031100872140858975,
      "VeMo (min entropy view)": 0.020312040035325288,
      "VeMo (random view)": 0.020312040035325288,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person lifted the dumbbell over his head."
  },
  "013190": {
    "text": "a person beginning to run in a straight line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0921274886866996,
      "Minus Multimodal Distance": -10.671509742736816,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.402426591492258e-05,
      "MoBERT-F": 0.33092073546259027,
      "MoBERT-N": 0.527059231738187,
      "MoBERT-min(F/N)": 0.33092073546259027,
      "MoBERT-max(F/N)": 0.527059231738187,
      "MotionCritic": -11.346681594848633,
      "VeMo (human-opt view)": 0.9840662842574889,
      "VeMo (max entropy view)": 0.940097449125824,
      "VeMo (min entropy view)": 0.9840662842574889,
      "VeMo (random view)": 0.9840662842574889,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person begins to run in a straight line."
  },
  "013207": {
    "text": "a person sprinting ahead, and then slowing down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6264822282283176,
      "Minus Multimodal Distance": -6.806602954864502,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7970154405920766e-05,
      "MoBERT-F": 0.5292453218515453,
      "MoBERT-N": 0.6015746214800444,
      "MoBERT-min(F/N)": 0.5292453218515453,
      "MoBERT-max(F/N)": 0.6015746214800444,
      "MotionCritic": -7.458406925201416,
      "VeMo (human-opt view)": 0.9808885579548275,
      "VeMo (max entropy view)": 0.9604943721032885,
      "VeMo (min entropy view)": 0.9808885579548275,
      "VeMo (random view)": 0.9604943721032885,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sprints ahead and then slows down."
  },
  "013208": {
    "text": "ther person walks forward and leans down to pick something up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3627548520882444,
      "Minus Multimodal Distance": -10.822732925415039,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.9686623495072126e-05,
      "MoBERT-F": 0.48835893832129373,
      "MoBERT-N": 0.5674473658376613,
      "MoBERT-min(F/N)": 0.48835893832129373,
      "MoBERT-max(F/N)": 0.5674473658376613,
      "MotionCritic": -7.462151050567627,
      "VeMo (human-opt view)": 0.9723435225618632,
      "VeMo (max entropy view)": 0.9362865736942849,
      "VeMo (min entropy view)": 0.9723435225618632,
      "VeMo (random view)": 0.9362865736942849,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person walks forward and leans down to pick something up."
  },
  "013224": {
    "text": "the person uses their arms to warm up",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6193101804836225,
      "Minus Multimodal Distance": -11.529729843139648,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00030185127980075777,
      "MoBERT-F": 0.5411880802674331,
      "MoBERT-N": 0.5834893777469313,
      "MoBERT-min(F/N)": 0.5411880802674331,
      "MoBERT-max(F/N)": 0.5834893777469313,
      "MotionCritic": -6.307063579559326,
      "VeMo (human-opt view)": 0.9706693406868889,
      "VeMo (max entropy view)": 0.9706693406868889,
      "VeMo (min entropy view)": 0.9770992366412213,
      "VeMo (random view)": 0.9706693406868889,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person uses their arms to warm up."
  },
  "013253": {
    "text": "a person is bent head over toes jumping and throwing arms wildly.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4179293407931888,
      "Minus Multimodal Distance": -4.520299911499023,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.1534421800170094e-05,
      "MoBERT-F": 0.5296149627060754,
      "MoBERT-N": 0.4749516914735906,
      "MoBERT-min(F/N)": 0.4749516914735906,
      "MoBERT-max(F/N)": 0.5296149627060754,
      "MotionCritic": -8.269844055175781,
      "VeMo (human-opt view)": 0.15620827770360482,
      "VeMo (max entropy view)": 0.15620827770360482,
      "VeMo (min entropy view)": 0.017947664063570352,
      "VeMo (random view)": 0.017947664063570352,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is bent head over toes, jumping and throwing their arms wildly."
  },
  "013286": {
    "text": "a person stands in place and then steps sideways to the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5476853388812976,
      "Minus Multimodal Distance": -12.159004211425781,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.311259049747605e-05,
      "MoBERT-F": 0.3982338162769836,
      "MoBERT-N": 0.49698869112040917,
      "MoBERT-min(F/N)": 0.3982338162769836,
      "MoBERT-max(F/N)": 0.49698869112040917,
      "MotionCritic": -17.75835418701172,
      "VeMo (human-opt view)": 0.9741496598639455,
      "VeMo (max entropy view)": 0.9497716894977168,
      "VeMo (min entropy view)": 0.9741496598639455,
      "VeMo (random view)": 0.9741496598639455,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands in place and then steps sideways to the left."
  },
  "013289": {
    "text": "a person walks counterclockwise in a large curve while swinging their arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.514069189063413,
      "Minus Multimodal Distance": -16.067293167114258,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.612189150066115e-05,
      "MoBERT-F": 0.5821094960309385,
      "MoBERT-N": 0.5549640136454006,
      "MoBERT-min(F/N)": 0.5549640136454006,
      "MoBERT-max(F/N)": 0.5821094960309385,
      "MotionCritic": -9.138373374938965,
      "VeMo (human-opt view)": 0.6232686980609419,
      "VeMo (max entropy view)": 0.6232686980609419,
      "VeMo (min entropy view)": 0.6374695863746959,
      "VeMo (random view)": 0.6232686980609419,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks counterclockwise in a large curve while swinging their arms."
  },
  "013314": {
    "text": "a person stretches his shoulders and arms with help from the opposite hands",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.917543325785743,
      "Minus Multimodal Distance": -9.689671516418457,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.4329066693317145e-05,
      "MoBERT-F": 0.3811351116884504,
      "MoBERT-N": 0.5029863324227082,
      "MoBERT-min(F/N)": 0.3811351116884504,
      "MoBERT-max(F/N)": 0.5029863324227082,
      "MotionCritic": -3.6619138717651367,
      "VeMo (human-opt view)": 0.9243316719528772,
      "VeMo (max entropy view)": 0.9243316719528772,
      "VeMo (min entropy view)": 0.9626955475330926,
      "VeMo (random view)": 0.9243316719528772,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stretches his shoulders and arms with the help of the opposite hands."
  },
  "013315": {
    "text": "the man is boxing upwards",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3298527247364444,
      "Minus Multimodal Distance": -8.432743072509766,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.364256579312496e-05,
      "MoBERT-F": 0.3829455640342193,
      "MoBERT-N": 0.46139501567277275,
      "MoBERT-min(F/N)": 0.3829455640342193,
      "MoBERT-max(F/N)": 0.46139501567277275,
      "MotionCritic": -9.67402458190918,
      "VeMo (human-opt view)": 0.024493392070484583,
      "VeMo (max entropy view)": 0.1734052111410602,
      "VeMo (min entropy view)": 0.024493392070484583,
      "VeMo (random view)": 0.024493392070484583,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man is boxing upwards."
  },
  "013316": {
    "text": "move the both the hand and after hold the legs.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5768043530241324,
      "Minus Multimodal Distance": -8.43952465057373,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.576072776922956e-05,
      "MoBERT-F": 0.4430810358859595,
      "MoBERT-N": 0.5036486239196869,
      "MoBERT-min(F/N)": 0.4430810358859595,
      "MoBERT-max(F/N)": 0.5036486239196869,
      "MotionCritic": -14.415226936340332,
      "VeMo (human-opt view)": 0.53125,
      "VeMo (max entropy view)": 0.53125,
      "VeMo (min entropy view)": 0.5769230769230769,
      "VeMo (random view)": 0.5769230769230769,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person moves both hands and then holds the legs."
  },
  "013343": {
    "text": "he walked over a high object which was in his way.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1433644320865017,
      "Minus Multimodal Distance": -7.2932233810424805,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2558950149687007e-05,
      "MoBERT-F": 0.33578371717416733,
      "MoBERT-N": 0.4866383410278385,
      "MoBERT-min(F/N)": 0.33578371717416733,
      "MoBERT-max(F/N)": 0.4866383410278385,
      "MotionCritic": -9.238290786743164,
      "VeMo (human-opt view)": 4.402839801307431e-05,
      "VeMo (max entropy view)": 0.0038340486409155937,
      "VeMo (min entropy view)": 4.402839801307431e-05,
      "VeMo (random view)": 0.0038340486409155937,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "He walked over a high object which was in his way."
  },
  "013344": {
    "text": "in a fighting stance, person punches downward with their left hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1806569462480248,
      "Minus Multimodal Distance": -9.607111930847168,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.385356831131503e-05,
      "MoBERT-F": 0.48206623500115925,
      "MoBERT-N": 0.49620726671541165,
      "MoBERT-min(F/N)": 0.48206623500115925,
      "MoBERT-max(F/N)": 0.49620726671541165,
      "MotionCritic": -4.931843280792236,
      "VeMo (human-opt view)": 0.6224188790560472,
      "VeMo (max entropy view)": 0.6224188790560472,
      "VeMo (min entropy view)": 0.6792452830188679,
      "VeMo (random view)": 0.6792452830188679,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "In a fighting stance, a person punches downward with their left hand."
  },
  "013371": {
    "text": "a person swings something with both hands",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2790285490821078,
      "Minus Multimodal Distance": -10.16442584991455,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.145674756728113e-05,
      "MoBERT-F": 0.4700926829159766,
      "MoBERT-N": 0.4738401403136318,
      "MoBERT-min(F/N)": 0.4700926829159766,
      "MoBERT-max(F/N)": 0.4738401403136318,
      "MotionCritic": -5.635588645935059,
      "VeMo (human-opt view)": 0.7180451127819549,
      "VeMo (max entropy view)": 0.5918918918918918,
      "VeMo (min entropy view)": 0.7180451127819549,
      "VeMo (random view)": 0.5918918918918918,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person swings something with both hands."
  },
  "013382": {
    "text": "he turns left then stops",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5978129156871466,
      "Minus Multimodal Distance": -9.016215324401855,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.0797465519281104e-05,
      "MoBERT-F": 0.5362688672107594,
      "MoBERT-N": 0.704054214481839,
      "MoBERT-min(F/N)": 0.5362688672107594,
      "MoBERT-max(F/N)": 0.704054214481839,
      "MotionCritic": -12.035501480102539,
      "VeMo (human-opt view)": 0.9285389167045972,
      "VeMo (max entropy view)": 0.9285389167045972,
      "VeMo (min entropy view)": 0.9647812166488794,
      "VeMo (random view)": 0.9285389167045972,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "He turns left then stops"
  },
  "013389": {
    "text": "person is performing golf motion",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.325102339439493,
      "Minus Multimodal Distance": -13.079052925109863,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3942293410073034e-05,
      "MoBERT-F": 0.43548270391397625,
      "MoBERT-N": 0.5238348213954704,
      "MoBERT-min(F/N)": 0.43548270391397625,
      "MoBERT-max(F/N)": 0.5238348213954704,
      "MotionCritic": -8.039069175720215,
      "VeMo (human-opt view)": 0.7060998151571165,
      "VeMo (max entropy view)": 0.5461847389558233,
      "VeMo (min entropy view)": 0.7060998151571165,
      "VeMo (random view)": 0.7060998151571165,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is performing a golf motion"
  },
  "013404": {
    "text": "a figure boxes right to punch twice.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2244421677668804,
      "Minus Multimodal Distance": -3.961245536804199,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2342963347909972e-05,
      "MoBERT-F": 0.41336643829819564,
      "MoBERT-N": 0.4610947923646022,
      "MoBERT-min(F/N)": 0.41336643829819564,
      "MoBERT-max(F/N)": 0.4610947923646022,
      "MotionCritic": -6.857484340667725,
      "VeMo (human-opt view)": 0.7665647298674821,
      "VeMo (max entropy view)": 0.7306122448979592,
      "VeMo (min entropy view)": 0.7665647298674821,
      "VeMo (random view)": 0.7306122448979592,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure boxes right to punch twice."
  },
  "013411": {
    "text": "a man movea his right hand as if to wipe down a tabletop.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.81042498782929,
      "Minus Multimodal Distance": -7.515628337860107,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4236365788965486e-05,
      "MoBERT-F": 0.3293623047269466,
      "MoBERT-N": 0.47676739028438275,
      "MoBERT-min(F/N)": 0.3293623047269466,
      "MoBERT-max(F/N)": 0.47676739028438275,
      "MotionCritic": -8.373988151550293,
      "VeMo (human-opt view)": 0.7666034155597723,
      "VeMo (max entropy view)": 0.7666034155597723,
      "VeMo (min entropy view)": 0.8596112311015118,
      "VeMo (random view)": 0.7666034155597723,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man moves his right hand as if to wipe down a tabletop."
  },
  "013419": {
    "text": "this person jumps up and down on his left leg.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1719611832941403,
      "Minus Multimodal Distance": -9.459990501403809,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.009631178341805935,
      "MoBERT-F": 0.5949276479288267,
      "MoBERT-N": 0.7351713637072088,
      "MoBERT-min(F/N)": 0.5949276479288267,
      "MoBERT-max(F/N)": 0.7351713637072088,
      "MotionCritic": -3.9804887771606445,
      "VeMo (human-opt view)": 0.21180555555555555,
      "VeMo (max entropy view)": 0.21180555555555555,
      "VeMo (min entropy view)": 0.1330909090909091,
      "VeMo (random view)": 0.1330909090909091,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person jumps up and down on his left leg."
  },
  "013444": {
    "text": "a person bends over and flaps his arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4908112754386025,
      "Minus Multimodal Distance": -10.07799243927002,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00022033498680684716,
      "MoBERT-F": 0.5398720413272775,
      "MoBERT-N": 0.5420426363995821,
      "MoBERT-min(F/N)": 0.5398720413272775,
      "MoBERT-max(F/N)": 0.5420426363995821,
      "MotionCritic": -15.819037437438965,
      "VeMo (human-opt view)": 0.5311203319502075,
      "VeMo (max entropy view)": 0.5154394299287411,
      "VeMo (min entropy view)": 0.5311203319502075,
      "VeMo (random view)": 0.5311203319502075,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends over and flaps his arms."
  },
  "013449": {
    "text": "a person is boxing, jabbing mostly with his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2992194616102177,
      "Minus Multimodal Distance": -13.301637649536133,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.246714757347945e-05,
      "MoBERT-F": 0.37586950901056504,
      "MoBERT-N": 0.44042550710191714,
      "MoBERT-min(F/N)": 0.37586950901056504,
      "MoBERT-max(F/N)": 0.44042550710191714,
      "MotionCritic": -14.383001327514648,
      "VeMo (human-opt view)": 0.7880386983289358,
      "VeMo (max entropy view)": 0.7880386983289358,
      "VeMo (min entropy view)": 0.8931572629051621,
      "VeMo (random view)": 0.7880386983289358,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is boxing, jabbing mostly with their right hand."
  },
  "013468": {
    "text": "a person has his legs spread and arms crossed.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3778865633890869,
      "Minus Multimodal Distance": -9.020864486694336,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4234330339822918e-05,
      "MoBERT-F": 0.38018185387073955,
      "MoBERT-N": 0.5172426394899152,
      "MoBERT-min(F/N)": 0.38018185387073955,
      "MoBERT-max(F/N)": 0.5172426394899152,
      "MotionCritic": -8.183328628540039,
      "VeMo (human-opt view)": 0.08536585365853659,
      "VeMo (max entropy view)": 0.23404255319148937,
      "VeMo (min entropy view)": 0.08536585365853659,
      "VeMo (random view)": 0.08536585365853659,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person has their legs spread and arms crossed."
  },
  "013469": {
    "text": "person has arms out before lowering and running quickly on the spot",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7001706649431002,
      "Minus Multimodal Distance": -9.861793518066406,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8620543162105605e-05,
      "MoBERT-F": 0.46437057587107244,
      "MoBERT-N": 0.47073126669699134,
      "MoBERT-min(F/N)": 0.46437057587107244,
      "MoBERT-max(F/N)": 0.47073126669699134,
      "MotionCritic": -4.748819828033447,
      "VeMo (human-opt view)": 0.9362928797924474,
      "VeMo (max entropy view)": 0.8597701149425288,
      "VeMo (min entropy view)": 0.9362928797924474,
      "VeMo (random view)": 0.9362928797924474,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person has their arms out before lowering them and running quickly on the spot."
  },
  "013471": {
    "text": "a person kicked with right leg",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.018917856738725,
      "Minus Multimodal Distance": -11.500792503356934,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0004452441062312573,
      "MoBERT-F": 0.6208152356231545,
      "MoBERT-N": 0.7107809331898975,
      "MoBERT-min(F/N)": 0.6208152356231545,
      "MoBERT-max(F/N)": 0.7107809331898975,
      "MotionCritic": -3.787785291671753,
      "VeMo (human-opt view)": 0.665314401622718,
      "VeMo (max entropy view)": 0.6501128668171557,
      "VeMo (min entropy view)": 0.665314401622718,
      "VeMo (random view)": 0.665314401622718,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person kicked with the right leg."
  },
  "013474": {
    "text": "the person put his hands on his knees.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7494211818393436,
      "Minus Multimodal Distance": -12.751917839050293,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.432184737699572e-05,
      "MoBERT-F": 0.39278365442081087,
      "MoBERT-N": 0.47287522222620443,
      "MoBERT-min(F/N)": 0.39278365442081087,
      "MoBERT-max(F/N)": 0.47287522222620443,
      "MotionCritic": -7.557065963745117,
      "VeMo (human-opt view)": 0.4375,
      "VeMo (max entropy view)": 0.4375,
      "VeMo (min entropy view)": 0.43672456575682383,
      "VeMo (random view)": 0.43672456575682383,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person put his hands on his knees."
  },
  "013475": {
    "text": "the person balances on their right toe, then switches to the left and spins in a circle",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.417749520792401,
      "Minus Multimodal Distance": -11.661402702331543,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3078013327904046e-05,
      "MoBERT-F": 0.43521321489070103,
      "MoBERT-N": 0.5931439920712539,
      "MoBERT-min(F/N)": 0.43521321489070103,
      "MoBERT-max(F/N)": 0.5931439920712539,
      "MotionCritic": -7.261866092681885,
      "VeMo (human-opt view)": 0.03319333423655331,
      "VeMo (max entropy view)": 0.03968410661401777,
      "VeMo (min entropy view)": 0.03319333423655331,
      "VeMo (random view)": 0.03968410661401777,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person balances on their right toe, then switches to the left and spins in a circle."
  },
  "013511": {
    "text": "stick figure starts running in place",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.51765114988741,
      "Minus Multimodal Distance": -11.138221740722656,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9919257760047913,
      "MoBERT-F": 0.8639066162908458,
      "MoBERT-N": 0.7871418676546846,
      "MoBERT-min(F/N)": 0.7871418676546846,
      "MoBERT-max(F/N)": 0.8639066162908458,
      "MotionCritic": -11.849018096923828,
      "VeMo (human-opt view)": 0.6510538641686182,
      "VeMo (max entropy view)": 0.5314401622718052,
      "VeMo (min entropy view)": 0.6510538641686182,
      "VeMo (random view)": 0.5314401622718052,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person starts running in place."
  },
  "013512": {
    "text": "while squatting, a person makes circular gestures with their right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2047905124827645,
      "Minus Multimodal Distance": -9.81195068359375,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2283276848611422e-05,
      "MoBERT-F": 0.26457753250470034,
      "MoBERT-N": 0.3839603536913777,
      "MoBERT-min(F/N)": 0.26457753250470034,
      "MoBERT-max(F/N)": 0.3839603536913777,
      "MotionCritic": -13.163091659545898,
      "VeMo (human-opt view)": 0.6785714285714286,
      "VeMo (max entropy view)": 0.6376146788990825,
      "VeMo (min entropy view)": 0.6785714285714286,
      "VeMo (random view)": 0.6785714285714286,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "While squatting, a person makes circular gestures with their right hand."
  },
  "013543": {
    "text": "a person made a micro move to bent the knee",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2219666806438754,
      "Minus Multimodal Distance": -4.177824974060059,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3892365788924508e-05,
      "MoBERT-F": 0.37830180890766607,
      "MoBERT-N": 0.49920264499917344,
      "MoBERT-min(F/N)": 0.37830180890766607,
      "MoBERT-max(F/N)": 0.49920264499917344,
      "MotionCritic": -10.459564208984375,
      "VeMo (human-opt view)": 0.4691358024691358,
      "VeMo (max entropy view)": 0.4691358024691358,
      "VeMo (min entropy view)": 0.14007782101167315,
      "VeMo (random view)": 0.4691358024691358,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person made a micromove to bend the knee."
  },
  "013558": {
    "text": "a person is pulling something and then walks and puts their arms out",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.212904762365946,
      "Minus Multimodal Distance": -6.6933369636535645,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6064193662023172e-05,
      "MoBERT-F": 0.2693546447395857,
      "MoBERT-N": 0.416300773562336,
      "MoBERT-min(F/N)": 0.2693546447395857,
      "MoBERT-max(F/N)": 0.416300773562336,
      "MotionCritic": -7.951362133026123,
      "VeMo (human-opt view)": 0.6934673366834171,
      "VeMo (max entropy view)": 0.6934673366834171,
      "VeMo (min entropy view)": 0.7776628748707343,
      "VeMo (random view)": 0.7776628748707343,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is pulling something and then walking and putting their arms out."
  },
  "013567": {
    "text": "person is performing dance moves",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5669258791504679,
      "Minus Multimodal Distance": -12.004351615905762,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.7283837795257568,
      "MoBERT-F": 0.7810430982862868,
      "MoBERT-N": 0.6877277098233007,
      "MoBERT-min(F/N)": 0.6877277098233007,
      "MoBERT-max(F/N)": 0.7810430982862868,
      "MotionCritic": -9.363446235656738,
      "VeMo (human-opt view)": 0.9364548494983278,
      "VeMo (max entropy view)": 0.9364548494983278,
      "VeMo (min entropy view)": 0.9740895089690161,
      "VeMo (random view)": 0.9364548494983278,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is performing dance moves."
  },
  "013568": {
    "text": "a person jumps repeatedly, waving their hands in circles.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6766329015294894,
      "Minus Multimodal Distance": -7.870542049407959,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.36987870931625366,
      "MoBERT-F": 0.6861141951556256,
      "MoBERT-N": 0.690098614159963,
      "MoBERT-min(F/N)": 0.6861141951556256,
      "MoBERT-max(F/N)": 0.690098614159963,
      "MotionCritic": -12.138957023620605,
      "VeMo (human-opt view)": 3.887438442412264e-05,
      "VeMo (max entropy view)": 0.00017933645511607053,
      "VeMo (min entropy view)": 3.887438442412264e-05,
      "VeMo (random view)": 3.887438442412264e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps repeatedly, waving their hands in circles."
  },
  "013586": {
    "text": "a person takes a large hop forward with both legs.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1295416422320301,
      "Minus Multimodal Distance": -3.979471206665039,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.977717485919129e-05,
      "MoBERT-F": 0.6069897663628397,
      "MoBERT-N": 0.686421639188665,
      "MoBERT-min(F/N)": 0.6069897663628397,
      "MoBERT-max(F/N)": 0.686421639188665,
      "MotionCritic": -11.058051109313965,
      "VeMo (human-opt view)": 0.37777777777777777,
      "VeMo (max entropy view)": 0.37777777777777777,
      "VeMo (min entropy view)": 0.23387872954764197,
      "VeMo (random view)": 0.37777777777777777,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person takes a large hop forward with both legs."
  },
  "013619": {
    "text": "a man swings his right arm back repeatedly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9697131758053725,
      "Minus Multimodal Distance": -8.348357200622559,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.4908487072680146e-05,
      "MoBERT-F": 0.4934565281764711,
      "MoBERT-N": 0.546993571743783,
      "MoBERT-min(F/N)": 0.4934565281764711,
      "MoBERT-max(F/N)": 0.546993571743783,
      "MotionCritic": -16.451383590698242,
      "VeMo (human-opt view)": 0.8075117370892019,
      "VeMo (max entropy view)": 0.8075117370892019,
      "VeMo (min entropy view)": 0.8171206225680934,
      "VeMo (random view)": 0.8171206225680934,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man swings his right arm back repeatedly."
  },
  "013632": {
    "text": "a person shakes arms to lossen up then walks backwads, then moves forward with the right arm in a swiming motion",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8362973243831143,
      "Minus Multimodal Distance": -7.269341945648193,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.292429235239979e-05,
      "MoBERT-F": 0.3859391215550408,
      "MoBERT-N": 0.46883860525953264,
      "MoBERT-min(F/N)": 0.3859391215550408,
      "MoBERT-max(F/N)": 0.46883860525953264,
      "MotionCritic": -7.811110019683838,
      "VeMo (human-opt view)": 0.6225352112676056,
      "VeMo (max entropy view)": 0.6225352112676056,
      "VeMo (min entropy view)": 0.8442437923250564,
      "VeMo (random view)": 0.8442437923250564,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person shakes their arms to loosen up, then walks backward, then moves forward with the right arm in a swimming motion."
  },
  "013633": {
    "text": "a person walks forward after appearing to lift something with their right hand and swinging their right arm slightly whilst walking.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9782782046257781,
      "Minus Multimodal Distance": -12.321256637573242,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00026454462204128504,
      "MoBERT-F": 0.4772816892743737,
      "MoBERT-N": 0.4592035715317079,
      "MoBERT-min(F/N)": 0.4592035715317079,
      "MoBERT-max(F/N)": 0.4772816892743737,
      "MotionCritic": -4.352848529815674,
      "VeMo (human-opt view)": 0.7773722627737226,
      "VeMo (max entropy view)": 0.7304015296367112,
      "VeMo (min entropy view)": 0.7773722627737226,
      "VeMo (random view)": 0.7773722627737226,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward after appearing to lift something with their right hand and swinging their right arm slightly while walking."
  },
  "013638": {
    "text": "a person grabbed the leg and did something",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2615176516166735,
      "Minus Multimodal Distance": -6.800962448120117,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.829015258816071e-05,
      "MoBERT-F": 0.46496592363054556,
      "MoBERT-N": 0.5626380360429053,
      "MoBERT-min(F/N)": 0.46496592363054556,
      "MoBERT-max(F/N)": 0.5626380360429053,
      "MotionCritic": -7.077602386474609,
      "VeMo (human-opt view)": 0.02040816326530612,
      "VeMo (max entropy view)": 0.02040816326530612,
      "VeMo (min entropy view)": 0.012398233384770925,
      "VeMo (random view)": 0.02040816326530612,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person grabbed the leg and did something."
  },
  "013641": {
    "text": "person bends knees then slightly jumps into the air.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7505990499849977,
      "Minus Multimodal Distance": -4.788217067718506,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9734953045845032,
      "MoBERT-F": 0.8038836547766941,
      "MoBERT-N": 0.7191873056144349,
      "MoBERT-min(F/N)": 0.7191873056144349,
      "MoBERT-max(F/N)": 0.8038836547766941,
      "MotionCritic": -4.813543796539307,
      "VeMo (human-opt view)": 0.6373937677053825,
      "VeMo (max entropy view)": 0.6373937677053825,
      "VeMo (min entropy view)": 0.09497206703910614,
      "VeMo (random view)": 0.09497206703910614,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person bends their knees and then slightly jumps into the air."
  },
  "013646": {
    "text": "walking from side to side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.438599280034703,
      "Minus Multimodal Distance": -15.43873405456543,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.6033598184585571,
      "MoBERT-F": 0.7837729540510138,
      "MoBERT-N": 0.7442189783710468,
      "MoBERT-min(F/N)": 0.7442189783710468,
      "MoBERT-max(F/N)": 0.7837729540510138,
      "MotionCritic": -6.575911998748779,
      "VeMo (human-opt view)": 0.9149736644093304,
      "VeMo (max entropy view)": 0.8741463414634146,
      "VeMo (min entropy view)": 0.9149736644093304,
      "VeMo (random view)": 0.8741463414634146,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking from side to side."
  },
  "013647": {
    "text": "the person is pacing back-and-forth.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2923275948412198,
      "Minus Multimodal Distance": -10.701013565063477,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.031252939254045486,
      "MoBERT-F": 0.6692341203337013,
      "MoBERT-N": 0.6485571791702223,
      "MoBERT-min(F/N)": 0.6485571791702223,
      "MoBERT-max(F/N)": 0.6692341203337013,
      "MotionCritic": -7.712718963623047,
      "VeMo (human-opt view)": 0.9902956039533116,
      "VeMo (max entropy view)": 0.9902956039533116,
      "VeMo (min entropy view)": 0.9908640769928168,
      "VeMo (random view)": 0.9908640769928168,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is pacing back and forth."
  },
  "013648": {
    "text": "moving hands from side to side above head.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8858893841614968,
      "Minus Multimodal Distance": -4.705446720123291,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.545280585763976e-05,
      "MoBERT-F": 0.3895509242150227,
      "MoBERT-N": 0.5061610087083029,
      "MoBERT-min(F/N)": 0.3895509242150227,
      "MoBERT-max(F/N)": 0.5061610087083029,
      "MotionCritic": -5.36178731918335,
      "VeMo (human-opt view)": 0.00015807812294360582,
      "VeMo (max entropy view)": 0.0008058017727639,
      "VeMo (min entropy view)": 0.00015807812294360582,
      "VeMo (random view)": 0.00015807812294360582,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is moving hands from side to side above the head."
  },
  "013654": {
    "text": "the person looks as if they are strumming a guitar",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.42707861750151843,
      "Minus Multimodal Distance": -9.652979850769043,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6592799258651212e-05,
      "MoBERT-F": 0.3718078933336704,
      "MoBERT-N": 0.39649261270388536,
      "MoBERT-min(F/N)": 0.3718078933336704,
      "MoBERT-max(F/N)": 0.39649261270388536,
      "MotionCritic": -12.104920387268066,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.42355889724310775,
      "VeMo (random view)": 0.5,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person looks as if they are strumming a guitar."
  },
  "013665": {
    "text": "a person walks forward with exaggerated backward kicks with every step.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3045496602932722,
      "Minus Multimodal Distance": -6.171050548553467,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.472946991678327e-05,
      "MoBERT-F": 0.44725477822807586,
      "MoBERT-N": 0.5424048794543541,
      "MoBERT-min(F/N)": 0.44725477822807586,
      "MoBERT-max(F/N)": 0.5424048794543541,
      "MotionCritic": -10.561851501464844,
      "VeMo (human-opt view)": 0.5460526315789473,
      "VeMo (max entropy view)": 0.5460526315789473,
      "VeMo (min entropy view)": 0.7431906614785992,
      "VeMo (random view)": 0.7431906614785992,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, taking each step with exaggerated backward kicks."
  },
  "013703": {
    "text": "this person is stretching his right leg.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2197808650600483,
      "Minus Multimodal Distance": -6.815793991088867,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.320278508705087e-05,
      "MoBERT-F": 0.47502113832682424,
      "MoBERT-N": 0.5216063347862903,
      "MoBERT-min(F/N)": 0.47502113832682424,
      "MoBERT-max(F/N)": 0.5216063347862903,
      "MotionCritic": -2.9657747745513916,
      "VeMo (human-opt view)": 0.9496290511518938,
      "VeMo (max entropy view)": 0.9496290511518938,
      "VeMo (min entropy view)": 0.982010582010582,
      "VeMo (random view)": 0.982010582010582,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person is stretching his right leg."
  },
  "013728": {
    "text": "a person with its hand raised, gets down on to its knees.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0826146181767515,
      "Minus Multimodal Distance": -8.678881645202637,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 9.007161133922637e-05,
      "MoBERT-F": 0.5378383457002383,
      "MoBERT-N": 0.5971881747480416,
      "MoBERT-min(F/N)": 0.5378383457002383,
      "MoBERT-max(F/N)": 0.5971881747480416,
      "MotionCritic": -13.980245590209961,
      "VeMo (human-opt view)": 0.798216276477146,
      "VeMo (max entropy view)": 0.798216276477146,
      "VeMo (min entropy view)": 0.8433734939759037,
      "VeMo (random view)": 0.8433734939759037,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person with their hand raised gets down on their knees."
  },
  "013736": {
    "text": "walking forward while waving.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0916769586198383,
      "Minus Multimodal Distance": -7.504536151885986,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.2138879254925996e-05,
      "MoBERT-F": 0.5282808646267022,
      "MoBERT-N": 0.5471853025991589,
      "MoBERT-min(F/N)": 0.5282808646267022,
      "MoBERT-max(F/N)": 0.5471853025991589,
      "MotionCritic": -12.187724113464355,
      "VeMo (human-opt view)": 0.5936675461741425,
      "VeMo (max entropy view)": 0.5936675461741425,
      "VeMo (min entropy view)": 0.29439252336448596,
      "VeMo (random view)": 0.29439252336448596,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking forward while waving."
  },
  "013745": {
    "text": "the person was holding the right side of his head.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9347394302642335,
      "Minus Multimodal Distance": -4.496690273284912,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6421263100928627e-05,
      "MoBERT-F": 0.33437324213024633,
      "MoBERT-N": 0.4758345301121836,
      "MoBERT-min(F/N)": 0.33437324213024633,
      "MoBERT-max(F/N)": 0.4758345301121836,
      "MotionCritic": -2.238043785095215,
      "VeMo (human-opt view)": 5.293585357818345e-05,
      "VeMo (max entropy view)": 5.649215969526515e-05,
      "VeMo (min entropy view)": 5.293585357818345e-05,
      "VeMo (random view)": 5.293585357818345e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person was holding the right side of his head."
  },
  "013747": {
    "text": "a figure leans on its left leg, spine arched in an agressive posture slightly swaying its shoulders",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9701257261577982,
      "Minus Multimodal Distance": -7.404871940612793,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.608497925393749e-05,
      "MoBERT-F": 0.4962001009684378,
      "MoBERT-N": 0.5512976614922211,
      "MoBERT-min(F/N)": 0.4962001009684378,
      "MoBERT-max(F/N)": 0.5512976614922211,
      "MotionCritic": -7.0504631996154785,
      "VeMo (human-opt view)": 3.8919454420011675e-05,
      "VeMo (max entropy view)": 0.0013280212483399733,
      "VeMo (min entropy view)": 3.8919454420011675e-05,
      "VeMo (random view)": 0.0013280212483399733,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure leans on its left leg, with its spine arched in an aggressive posture, slightly swaying its shoulders."
  },
  "013757": {
    "text": "a person runs, hesitates, then runs again.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9457236625871478,
      "Minus Multimodal Distance": -8.968193054199219,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.510872556944378e-05,
      "MoBERT-F": 0.4228747757603798,
      "MoBERT-N": 0.5803289637875786,
      "MoBERT-min(F/N)": 0.4228747757603798,
      "MoBERT-max(F/N)": 0.5803289637875786,
      "MotionCritic": -11.301017761230469,
      "VeMo (human-opt view)": 0.8524590163934426,
      "VeMo (max entropy view)": 0.21177802944507362,
      "VeMo (min entropy view)": 0.8524590163934426,
      "VeMo (random view)": 0.8524590163934426,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs, hesitates, then runs again."
  },
  "013765": {
    "text": "a person moves around in a random fashion, trying to dodge something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.324534379595633,
      "Minus Multimodal Distance": -7.807673454284668,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8464903235435486,
      "MoBERT-F": 0.6326882434065743,
      "MoBERT-N": 0.36441579901375526,
      "MoBERT-min(F/N)": 0.36441579901375526,
      "MoBERT-max(F/N)": 0.6326882434065743,
      "MotionCritic": -9.491291046142578,
      "VeMo (human-opt view)": 0.7431906614785992,
      "VeMo (max entropy view)": 0.7431906614785992,
      "VeMo (min entropy view)": 0.7773972602739726,
      "VeMo (random view)": 0.7431906614785992,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves around randomly, trying to dodge something."
  },
  "013768": {
    "text": "moving the hands and work some thing.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6225462513678316,
      "Minus Multimodal Distance": -14.28665542602539,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0002595469413790852,
      "MoBERT-F": 0.5342313314926078,
      "MoBERT-N": 0.3802612549216762,
      "MoBERT-min(F/N)": 0.3802612549216762,
      "MoBERT-max(F/N)": 0.5342313314926078,
      "MotionCritic": -10.557777404785156,
      "VeMo (human-opt view)": 0.9705793145283591,
      "VeMo (max entropy view)": 0.9197461739455021,
      "VeMo (min entropy view)": 0.9705793145283591,
      "VeMo (random view)": 0.9197461739455021,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is moving the hands and working something."
  },
  "013777": {
    "text": "a man sits down with crossed legs then gets up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1836695360255465,
      "Minus Multimodal Distance": -12.851402282714844,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4137594664352946e-05,
      "MoBERT-F": 0.3283359028473314,
      "MoBERT-N": 0.5319602062122919,
      "MoBERT-min(F/N)": 0.3283359028473314,
      "MoBERT-max(F/N)": 0.5319602062122919,
      "MotionCritic": -6.687539577484131,
      "VeMo (human-opt view)": 0.7317073170731707,
      "VeMo (max entropy view)": 0.7317073170731707,
      "VeMo (min entropy view)": 0.02597187758478081,
      "VeMo (random view)": 0.7317073170731707,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man sits down with crossed legs and then gets up."
  },
  "013778": {
    "text": "a person sits down, turns to their right, then stands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2734612791292528,
      "Minus Multimodal Distance": -12.704955101013184,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.27752225328004e-05,
      "MoBERT-F": 0.383540518914403,
      "MoBERT-N": 0.4264500435310148,
      "MoBERT-min(F/N)": 0.383540518914403,
      "MoBERT-max(F/N)": 0.4264500435310148,
      "MotionCritic": -7.773561000823975,
      "VeMo (human-opt view)": 0.2015888778550149,
      "VeMo (max entropy view)": 0.2573099415204678,
      "VeMo (min entropy view)": 0.2015888778550149,
      "VeMo (random view)": 0.2015888778550149,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits down, turns to their right, then stands."
  },
  "013811": {
    "text": "a person is sitting with both arms out in front of themselves holding something, then they move their right hand away and make sliding motions with it.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3468719612731639,
      "Minus Multimodal Distance": -8.629998207092285,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2567430278286338e-05,
      "MoBERT-F": 0.3311444749470358,
      "MoBERT-N": 0.3794202262086238,
      "MoBERT-min(F/N)": 0.3311444749470358,
      "MoBERT-max(F/N)": 0.3794202262086238,
      "MotionCritic": 2.8085336685180664,
      "VeMo (human-opt view)": 0.362962962962963,
      "VeMo (max entropy view)": 0.362962962962963,
      "VeMo (min entropy view)": 0.008578329307903722,
      "VeMo (random view)": 0.008578329307903722,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is sitting with both arms extended in front of them, holding something. Then, they move their right hand away and make sliding motions with it."
  },
  "013833": {
    "text": "the person crouches and walks forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0415024127720074,
      "Minus Multimodal Distance": -11.816622734069824,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0005509555339813232,
      "MoBERT-F": 0.5751975108102936,
      "MoBERT-N": 0.6296434718553129,
      "MoBERT-min(F/N)": 0.5751975108102936,
      "MoBERT-max(F/N)": 0.6296434718553129,
      "MotionCritic": -5.1759843826293945,
      "VeMo (human-opt view)": 0.9603751465416178,
      "VeMo (max entropy view)": 0.9048991354466859,
      "VeMo (min entropy view)": 0.9603751465416178,
      "VeMo (random view)": 0.9048991354466859,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person crouches and walks forward."
  },
  "013846": {
    "text": "a person using the right arm to reach for something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6479657520515619,
      "Minus Multimodal Distance": -7.993627071380615,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4809822207316756e-05,
      "MoBERT-F": 0.45122056713298997,
      "MoBERT-N": 0.5266015049369501,
      "MoBERT-min(F/N)": 0.45122056713298997,
      "MoBERT-max(F/N)": 0.5266015049369501,
      "MotionCritic": -9.993091583251953,
      "VeMo (human-opt view)": 0.899165061014772,
      "VeMo (max entropy view)": 0.899165061014772,
      "VeMo (min entropy view)": 0.9049773755656109,
      "VeMo (random view)": 0.899165061014772,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is using the right arm to reach for something."
  },
  "013855": {
    "text": "the toon has their both arms up & extending in a \"dancing\" position, while moving forwards then backwards.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2255944205690719,
      "Minus Multimodal Distance": -5.817939281463623,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.309826156008057e-05,
      "MoBERT-F": 0.4071000928514218,
      "MoBERT-N": 0.44481562458137003,
      "MoBERT-min(F/N)": 0.4071000928514218,
      "MoBERT-max(F/N)": 0.44481562458137003,
      "MotionCritic": -11.645255088806152,
      "VeMo (human-opt view)": 0.10112359550561797,
      "VeMo (max entropy view)": 0.28044280442804426,
      "VeMo (min entropy view)": 0.10112359550561797,
      "VeMo (random view)": 0.10112359550561797,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The toon has both their arms held up and extended in a \"dancing\" position, while moving forward then backward."
  },
  "013898": {
    "text": "a person walks forward and to the left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.327990145718108,
      "Minus Multimodal Distance": -2.6745598316192627,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.5830891132354736,
      "MoBERT-F": 0.7561800149614695,
      "MoBERT-N": 0.7914564435182544,
      "MoBERT-min(F/N)": 0.7561800149614695,
      "MoBERT-max(F/N)": 0.7914564435182544,
      "MotionCritic": -11.803704261779785,
      "VeMo (human-opt view)": 0.9101034208432777,
      "VeMo (max entropy view)": 0.8673765730880929,
      "VeMo (min entropy view)": 0.9101034208432777,
      "VeMo (random view)": 0.9101034208432777,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and to the left."
  },
  "013952": {
    "text": "walking forward then slightly bending down at waist and touching a surface from right to left",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2171146832557567,
      "Minus Multimodal Distance": -4.271550178527832,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8326796382316388e-05,
      "MoBERT-F": 0.47311453543104776,
      "MoBERT-N": 0.5052736012952113,
      "MoBERT-min(F/N)": 0.47311453543104776,
      "MoBERT-max(F/N)": 0.5052736012952113,
      "MotionCritic": -12.710258483886719,
      "VeMo (human-opt view)": 0.8936825885978429,
      "VeMo (max entropy view)": 0.8936825885978429,
      "VeMo (min entropy view)": 0.9097120546608102,
      "VeMo (random view)": 0.8936825885978429,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking forward, then slightly bending down at the waist and touching a surface from right to left."
  },
  "013969": {
    "text": "the person is getting ready to watch tv.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3553518757341128,
      "Minus Multimodal Distance": -10.669492721557617,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.172996730427258e-05,
      "MoBERT-F": 0.5144648328403356,
      "MoBERT-N": 0.635160531156665,
      "MoBERT-min(F/N)": 0.5144648328403356,
      "MoBERT-max(F/N)": 0.635160531156665,
      "MotionCritic": -11.380182266235352,
      "VeMo (human-opt view)": 8.750057904794958e-05,
      "VeMo (max entropy view)": 0.000754047460634287,
      "VeMo (min entropy view)": 8.750057904794958e-05,
      "VeMo (random view)": 0.000754047460634287,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is getting ready to watch TV."
  },
  "013974": {
    "text": "a person is standing and adjusts their weight to lean more on their left side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5682684579011038,
      "Minus Multimodal Distance": -5.015538692474365,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8858548830612563e-05,
      "MoBERT-F": 0.38309648183315326,
      "MoBERT-N": 0.44512632296516735,
      "MoBERT-min(F/N)": 0.38309648183315326,
      "MoBERT-max(F/N)": 0.44512632296516735,
      "MotionCritic": -12.269424438476562,
      "VeMo (human-opt view)": 0.8933143669985776,
      "VeMo (max entropy view)": 0.852157943067034,
      "VeMo (min entropy view)": 0.8933143669985776,
      "VeMo (random view)": 0.852157943067034,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is standing and adjusts their weight to lean more on their left side."
  },
  "013980": {
    "text": "a person slowly walked backward, and sat on the knee and hands",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.430367631828138,
      "Minus Multimodal Distance": -9.461644172668457,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.419622614979744e-05,
      "MoBERT-F": 0.38832100084975807,
      "MoBERT-N": 0.5345776773160129,
      "MoBERT-min(F/N)": 0.38832100084975807,
      "MoBERT-max(F/N)": 0.5345776773160129,
      "MotionCritic": -2.879729747772217,
      "VeMo (human-opt view)": 0.9497716894977168,
      "VeMo (max entropy view)": 0.9467084639498433,
      "VeMo (min entropy view)": 0.9497716894977168,
      "VeMo (random view)": 0.9497716894977168,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walked slowly backward and sat on one knee and their hands."
  },
  "013998": {
    "text": "a man raises his left hand to this face then lowers it back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.46272526701269057,
      "Minus Multimodal Distance": -5.743031978607178,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.414387902012095e-05,
      "MoBERT-F": 0.3077470851545492,
      "MoBERT-N": 0.4894927850132125,
      "MoBERT-min(F/N)": 0.3077470851545492,
      "MoBERT-max(F/N)": 0.4894927850132125,
      "MotionCritic": -8.80954360961914,
      "VeMo (human-opt view)": 0.9553437029603613,
      "VeMo (max entropy view)": 0.9553437029603613,
      "VeMo (min entropy view)": 0.9850422718404509,
      "VeMo (random view)": 0.9850422718404509,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man raises his left hand to his face then lowers it back down."
  },
  "014010": {
    "text": "a man walks forward with arms relatively stiff at sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8799493777798428,
      "Minus Multimodal Distance": -7.864882469177246,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.256439438497182e-05,
      "MoBERT-F": 0.37723580758604436,
      "MoBERT-N": 0.5116662293017895,
      "MoBERT-min(F/N)": 0.37723580758604436,
      "MoBERT-max(F/N)": 0.5116662293017895,
      "MotionCritic": -7.486119747161865,
      "VeMo (human-opt view)": 0.9890018098287624,
      "VeMo (max entropy view)": 0.9830085437266007,
      "VeMo (min entropy view)": 0.9890018098287624,
      "VeMo (random view)": 0.9830085437266007,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks forward with his arms relatively stiff at his sides."
  },
  "014020": {
    "text": "a person is standing up straight, with their arms stretched out. they then bring both hands toward their face simultainiously.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9908981543911191,
      "Minus Multimodal Distance": -11.661684036254883,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.183548248372972e-05,
      "MoBERT-F": 0.36871838129683077,
      "MoBERT-N": 0.45926332305190587,
      "MoBERT-min(F/N)": 0.36871838129683077,
      "MoBERT-max(F/N)": 0.45926332305190587,
      "MotionCritic": -12.586511611938477,
      "VeMo (human-opt view)": 0.7553072625698324,
      "VeMo (max entropy view)": 0.335243553008596,
      "VeMo (min entropy view)": 0.7553072625698324,
      "VeMo (random view)": 0.7553072625698324,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is standing up straight, with their arms stretched out. They then bring both hands toward their face simultaneously."
  },
  "014021": {
    "text": "a person's squats down using mainly their right leg, their left leg crosses their right leg, and then they stand back up",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6265734697976872,
      "Minus Multimodal Distance": -3.0741753578186035,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8785490940208547e-05,
      "MoBERT-F": 0.4067323711634969,
      "MoBERT-N": 0.4558653362146478,
      "MoBERT-min(F/N)": 0.4067323711634969,
      "MoBERT-max(F/N)": 0.4558653362146478,
      "MotionCritic": -11.171027183532715,
      "VeMo (human-opt view)": 0.4696629213483146,
      "VeMo (max entropy view)": 0.4696629213483146,
      "VeMo (min entropy view)": 0.21180555555555555,
      "VeMo (random view)": 0.21180555555555555,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person squats down using mainly their right leg, with their left leg crossing their right leg, and then they stand back up."
  },
  "014043": {
    "text": "a person sits down on something and then stands back up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8853894471442708,
      "Minus Multimodal Distance": -9.698565483093262,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.125805233139545e-05,
      "MoBERT-F": 0.424623850905766,
      "MoBERT-N": 0.4992310411099922,
      "MoBERT-min(F/N)": 0.424623850905766,
      "MoBERT-max(F/N)": 0.4992310411099922,
      "MotionCritic": -5.388611316680908,
      "VeMo (human-opt view)": 0.9914172074523759,
      "VeMo (max entropy view)": 0.9850079744816587,
      "VeMo (min entropy view)": 0.9914172074523759,
      "VeMo (random view)": 0.9914172074523759,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits down on something and then stands back up."
  },
  "014049": {
    "text": "person stretches both arms up and then put arms down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8901595159682829,
      "Minus Multimodal Distance": -3.902266502380371,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00015142725897021592,
      "MoBERT-F": 0.4198765982375062,
      "MoBERT-N": 0.5367307123515241,
      "MoBERT-min(F/N)": 0.4198765982375062,
      "MoBERT-max(F/N)": 0.5367307123515241,
      "MotionCritic": -4.485828876495361,
      "VeMo (human-opt view)": 0.7774538386783285,
      "VeMo (max entropy view)": 0.7774538386783285,
      "VeMo (min entropy view)": 0.8074245939675174,
      "VeMo (random view)": 0.8074245939675174,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stretches both arms up and then puts the arms down."
  },
  "014108": {
    "text": "a person steps back and sits down, then stands back up again and walks forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0854882680681193,
      "Minus Multimodal Distance": -9.824387550354004,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0008648356306366622,
      "MoBERT-F": 0.5474963775664516,
      "MoBERT-N": 0.5386100115188486,
      "MoBERT-min(F/N)": 0.5386100115188486,
      "MoBERT-max(F/N)": 0.5474963775664516,
      "MotionCritic": -9.10860824584961,
      "VeMo (human-opt view)": 0.9867265672860935,
      "VeMo (max entropy view)": 0.9784537389100126,
      "VeMo (min entropy view)": 0.9867265672860935,
      "VeMo (random view)": 0.9867265672860935,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps back and sits down. Then, they stand back up again and walk forward."
  },
  "014169": {
    "text": "it looks like the person is taking care of his shoes",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4495017903743188,
      "Minus Multimodal Distance": -9.770682334899902,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0002410311863059178,
      "MoBERT-F": 0.5572974743350907,
      "MoBERT-N": 0.644952963615765,
      "MoBERT-min(F/N)": 0.5572974743350907,
      "MoBERT-max(F/N)": 0.644952963615765,
      "MotionCritic": -8.757451057434082,
      "VeMo (human-opt view)": 0.002976923249099089,
      "VeMo (max entropy view)": 0.002976923249099089,
      "VeMo (min entropy view)": 0.0020544697887583626,
      "VeMo (random view)": 0.0020544697887583626,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "It looks like the person is taking care of his shoes."
  },
  "014171": {
    "text": "a walking man bumps something with his right leg then turns and walks in another direction.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.036859996457039,
      "Minus Multimodal Distance": -1.9246503114700317,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.4179254472255707,
      "MoBERT-F": 0.6354111345707132,
      "MoBERT-N": 0.5641275868792246,
      "MoBERT-min(F/N)": 0.5641275868792246,
      "MoBERT-max(F/N)": 0.6354111345707132,
      "MotionCritic": -10.425115585327148,
      "VeMo (human-opt view)": 0.8936825885978429,
      "VeMo (max entropy view)": 0.8932038834951457,
      "VeMo (min entropy view)": 0.8936825885978429,
      "VeMo (random view)": 0.8932038834951457,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A walking man bumps into something with his right leg, then turns and walks in another direction."
  },
  "014185": {
    "text": "a person who is standing on a balance beam takes three steps forward and then steps down off the beam.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0183295351651132,
      "Minus Multimodal Distance": -5.99428653717041,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2491905838251114e-05,
      "MoBERT-F": 0.4077252309951548,
      "MoBERT-N": 0.5883819459920094,
      "MoBERT-min(F/N)": 0.4077252309951548,
      "MoBERT-max(F/N)": 0.5883819459920094,
      "MotionCritic": -9.411417007446289,
      "VeMo (human-opt view)": 0.007114962820307067,
      "VeMo (max entropy view)": 0.007114962820307067,
      "VeMo (min entropy view)": 0.004088606822481235,
      "VeMo (random view)": 0.004088606822481235,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who is standing on a balance beam takes three steps forward and then steps down off the beam."
  },
  "014192": {
    "text": "a person walks forward and then appears to bump into something, then continues walking forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0509575429579103,
      "Minus Multimodal Distance": -8.484786987304688,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.012282151728868484,
      "MoBERT-F": 0.6370163628832485,
      "MoBERT-N": 0.5876088855767567,
      "MoBERT-min(F/N)": 0.5876088855767567,
      "MoBERT-max(F/N)": 0.6370163628832485,
      "MotionCritic": -5.785974979400635,
      "VeMo (human-opt view)": 0.788,
      "VeMo (max entropy view)": 0.788,
      "VeMo (min entropy view)": 0.7982740021574973,
      "VeMo (random view)": 0.7982740021574973,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, then appears to bump into something, and then continues walking forward."
  },
  "014200": {
    "text": "a person sits on a stool of some sort and brushes off their legs.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.074300851986863,
      "Minus Multimodal Distance": -4.458399772644043,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.425823731755372e-05,
      "MoBERT-F": 0.44075609153181783,
      "MoBERT-N": 0.456090588393466,
      "MoBERT-min(F/N)": 0.44075609153181783,
      "MoBERT-max(F/N)": 0.456090588393466,
      "MotionCritic": -12.163399696350098,
      "VeMo (human-opt view)": 0.004080503492634346,
      "VeMo (max entropy view)": 0.01405738494126709,
      "VeMo (min entropy view)": 0.004080503492634346,
      "VeMo (random view)": 0.01405738494126709,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits on some sort of stool and brushes their legs."
  },
  "014215": {
    "text": "a person stands still and does not move.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2355355476670353,
      "Minus Multimodal Distance": -4.277457237243652,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.619580820668489e-05,
      "MoBERT-F": 0.3463707350364312,
      "MoBERT-N": 0.4404138614908763,
      "MoBERT-min(F/N)": 0.3463707350364312,
      "MoBERT-max(F/N)": 0.4404138614908763,
      "MotionCritic": -2.251117467880249,
      "VeMo (human-opt view)": 6.3224675312152405e-06,
      "VeMo (max entropy view)": 6.3224675312152405e-06,
      "VeMo (min entropy view)": 2.191175473339599e-06,
      "VeMo (random view)": 6.3224675312152405e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands still and does not move."
  },
  "014222": {
    "text": "a blind folded person walks around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.685072433941345,
      "Minus Multimodal Distance": -7.957069396972656,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0047004795633256435,
      "MoBERT-F": 0.6166853580795363,
      "MoBERT-N": 0.5824060659601823,
      "MoBERT-min(F/N)": 0.5824060659601823,
      "MoBERT-max(F/N)": 0.6166853580795363,
      "MotionCritic": -12.92706298828125,
      "VeMo (human-opt view)": 0.7188755020080321,
      "VeMo (max entropy view)": 0.6666666666666666,
      "VeMo (min entropy view)": 0.7188755020080321,
      "VeMo (random view)": 0.7188755020080321,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A blindfolded person walks around."
  },
  "014232": {
    "text": "a person waalks forward and appears to pour two items into a bowl.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.310043333126053,
      "Minus Multimodal Distance": -10.361038208007812,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.24294414895121e-05,
      "MoBERT-F": 0.4346224806580032,
      "MoBERT-N": 0.46904777583962187,
      "MoBERT-min(F/N)": 0.4346224806580032,
      "MoBERT-max(F/N)": 0.46904777583962187,
      "MotionCritic": -14.77437973022461,
      "VeMo (human-opt view)": 9.320003992935721e-05,
      "VeMo (max entropy view)": 0.0001486699235550939,
      "VeMo (min entropy view)": 9.320003992935721e-05,
      "VeMo (random view)": 0.0001486699235550939,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward and appears to pour two items into a bowl."
  },
  "014244": {
    "text": "someone scrolls from right to left and then stands",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2857490740484638,
      "Minus Multimodal Distance": -10.094782829284668,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.015574995428323746,
      "MoBERT-F": 0.5696661993377433,
      "MoBERT-N": 0.6714668516279064,
      "MoBERT-min(F/N)": 0.5696661993377433,
      "MoBERT-max(F/N)": 0.6714668516279064,
      "MotionCritic": -15.2517728805542,
      "VeMo (human-opt view)": 0.059946316731285414,
      "VeMo (max entropy view)": 0.059946316731285414,
      "VeMo (min entropy view)": 0.05656988925471416,
      "VeMo (random view)": 0.05656988925471416,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone scrolls from right to left and then stands."
  },
  "014279": {
    "text": "stay stand on the place and run.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5819555727734536,
      "Minus Multimodal Distance": -4.884631156921387,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.105564974248409e-05,
      "MoBERT-F": 0.4303673956243321,
      "MoBERT-N": 0.6450561715451196,
      "MoBERT-min(F/N)": 0.4303673956243321,
      "MoBERT-max(F/N)": 0.6450561715451196,
      "MotionCritic": -4.459060192108154,
      "VeMo (human-opt view)": 0.7055630936227951,
      "VeMo (max entropy view)": 0.7055630936227951,
      "VeMo (min entropy view)": 0.18287243532560213,
      "VeMo (random view)": 0.7055630936227951,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stays standing on the place and runs."
  },
  "014285": {
    "text": "the person was walking so he could sneak up on someone.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6095543794378944,
      "Minus Multimodal Distance": -5.717724800109863,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.134120911359787,
      "MoBERT-F": 0.4630882440978292,
      "MoBERT-N": 0.35729721083458343,
      "MoBERT-min(F/N)": 0.35729721083458343,
      "MoBERT-max(F/N)": 0.4630882440978292,
      "MotionCritic": -13.900548934936523,
      "VeMo (human-opt view)": 0.6071428571428571,
      "VeMo (max entropy view)": 0.48519362186788156,
      "VeMo (min entropy view)": 0.6071428571428571,
      "VeMo (random view)": 0.6071428571428571,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was walking quietly so that he could sneak up on someone."
  },
  "014295": {
    "text": "person down on hands and knees stands up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6843907574315464,
      "Minus Multimodal Distance": -7.870302677154541,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3227734345709905e-05,
      "MoBERT-F": 0.3573393255684512,
      "MoBERT-N": 0.4587403590840786,
      "MoBERT-min(F/N)": 0.3573393255684512,
      "MoBERT-max(F/N)": 0.4587403590840786,
      "MotionCritic": -8.180461883544922,
      "VeMo (human-opt view)": 0.9996198250214001,
      "VeMo (max entropy view)": 0.9989656203581601,
      "VeMo (min entropy view)": 0.9996198250214001,
      "VeMo (random view)": 0.9996198250214001,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who is down on their hands and knees stands up."
  },
  "014310": {
    "text": "a man is walking forward stumbles and goes to the ground and get back up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4893665983752795,
      "Minus Multimodal Distance": -9.623376846313477,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0009397566900588572,
      "MoBERT-F": 0.5540416064346648,
      "MoBERT-N": 0.49835623802709916,
      "MoBERT-min(F/N)": 0.49835623802709916,
      "MoBERT-max(F/N)": 0.5540416064346648,
      "MotionCritic": -7.650808811187744,
      "VeMo (human-opt view)": 0.014103792961796521,
      "VeMo (max entropy view)": 0.017964071856287425,
      "VeMo (min entropy view)": 0.014103792961796521,
      "VeMo (random view)": 0.014103792961796521,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man is walking forward, stumbles, goes to the ground and gets back up."
  },
  "014326": {
    "text": "person lunges forward with left foot first repeatedly",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8546044124892713,
      "Minus Multimodal Distance": -6.550167560577393,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.561239125498105e-05,
      "MoBERT-F": 0.4398752909694637,
      "MoBERT-N": 0.5217329287284491,
      "MoBERT-min(F/N)": 0.4398752909694637,
      "MoBERT-max(F/N)": 0.5217329287284491,
      "MotionCritic": -4.319085597991943,
      "VeMo (human-opt view)": 0.880648899188876,
      "VeMo (max entropy view)": 0.880648899188876,
      "VeMo (min entropy view)": 0.8806539509536785,
      "VeMo (random view)": 0.8806539509536785,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person lunges forward with left foot first repeatedly"
  },
  "014336": {
    "text": "a person raises their hands and brings them closer to their face then lowers them",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.47221450783823277,
      "Minus Multimodal Distance": -3.789318799972534,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9100918254698627e-05,
      "MoBERT-F": 0.2938165974729367,
      "MoBERT-N": 0.3900073380861793,
      "MoBERT-min(F/N)": 0.2938165974729367,
      "MoBERT-max(F/N)": 0.3900073380861793,
      "MotionCritic": -7.867407321929932,
      "VeMo (human-opt view)": 0.9991452302233691,
      "VeMo (max entropy view)": 0.9964030708111881,
      "VeMo (min entropy view)": 0.9991452302233691,
      "VeMo (random view)": 0.9991452302233691,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises their hands, brings them closer to their face, then lowers them."
  },
  "014342": {
    "text": "stick man does fighting movements kicks in the air punches the air and stops",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.058193749847156,
      "Minus Multimodal Distance": -11.44446086883545,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9910325407981873,
      "MoBERT-F": 0.7661079304213192,
      "MoBERT-N": 0.6267439098446477,
      "MoBERT-min(F/N)": 0.6267439098446477,
      "MoBERT-max(F/N)": 0.7661079304213192,
      "MotionCritic": -3.9446234703063965,
      "VeMo (human-opt view)": 0.992431235000923,
      "VeMo (max entropy view)": 0.9914415491274206,
      "VeMo (min entropy view)": 0.992431235000923,
      "VeMo (random view)": 0.992431235000923,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man does fighting movements, kicks in the air, punches the air and stops."
  },
  "014347": {
    "text": "a person is sitting and puts their left hand to their head, and their right foot up on their left knee.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0256185139694076,
      "Minus Multimodal Distance": -8.414734840393066,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.547405347286258e-05,
      "MoBERT-F": 0.30008687236504905,
      "MoBERT-N": 0.3487846330879416,
      "MoBERT-min(F/N)": 0.30008687236504905,
      "MoBERT-max(F/N)": 0.3487846330879416,
      "MotionCritic": -8.432831764221191,
      "VeMo (human-opt view)": 0.03095558546433378,
      "VeMo (max entropy view)": 0.5780346820809249,
      "VeMo (min entropy view)": 0.03095558546433378,
      "VeMo (random view)": 0.5780346820809249,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is sitting, putting their left hand to their head and their right foot up on their left knee."
  },
  "014351": {
    "text": "the person bent down and dodge something towards the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0129569874007829,
      "Minus Multimodal Distance": -7.856818675994873,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2465326765086502e-05,
      "MoBERT-F": 0.3436183685386912,
      "MoBERT-N": 0.44383410721028654,
      "MoBERT-min(F/N)": 0.3436183685386912,
      "MoBERT-max(F/N)": 0.44383410721028654,
      "MotionCritic": -5.236281394958496,
      "VeMo (human-opt view)": 0.001032520846239844,
      "VeMo (max entropy view)": 0.4527027027027027,
      "VeMo (min entropy view)": 0.001032520846239844,
      "VeMo (random view)": 0.001032520846239844,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person bent down and dodged something towards the left."
  },
  "014352": {
    "text": "a person raises their arms and throws them back down in frustration",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6935589349527441,
      "Minus Multimodal Distance": -10.956188201904297,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.005276346113532782,
      "MoBERT-F": 0.558328512354511,
      "MoBERT-N": 0.4780610958798218,
      "MoBERT-min(F/N)": 0.4780610958798218,
      "MoBERT-max(F/N)": 0.558328512354511,
      "MotionCritic": -0.8540781140327454,
      "VeMo (human-opt view)": 0.5478547854785478,
      "VeMo (max entropy view)": 0.5478547854785478,
      "VeMo (min entropy view)": 0.9326705829191143,
      "VeMo (random view)": 0.5478547854785478,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person raises their arms and then throws them back down in frustration."
  },
  "014361": {
    "text": "the person is getting ready to place the football for a kick off.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1066722065896926,
      "Minus Multimodal Distance": -9.428236961364746,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0001578752271598205,
      "MoBERT-F": 0.5815564180219372,
      "MoBERT-N": 0.6293505312792798,
      "MoBERT-min(F/N)": 0.5815564180219372,
      "MoBERT-max(F/N)": 0.6293505312792798,
      "MotionCritic": -1.9838563203811646,
      "VeMo (human-opt view)": 0.0005195132318445868,
      "VeMo (max entropy view)": 0.0010954730878778077,
      "VeMo (min entropy view)": 0.0005195132318445868,
      "VeMo (random view)": 0.0005195132318445868,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is getting ready to place the football for a kick - off."
  },
  "014384": {
    "text": "a person balances on one foot while moving their other, and then switches.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0872621538346519,
      "Minus Multimodal Distance": -10.865046501159668,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0007999269873835146,
      "MoBERT-F": 0.6221212029176231,
      "MoBERT-N": 0.6698683820430605,
      "MoBERT-min(F/N)": 0.6221212029176231,
      "MoBERT-max(F/N)": 0.6698683820430605,
      "MotionCritic": -20.32284164428711,
      "VeMo (human-opt view)": 0.9770992366412213,
      "VeMo (max entropy view)": 0.974020938348197,
      "VeMo (min entropy view)": 0.9770992366412213,
      "VeMo (random view)": 0.9770992366412213,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person balances on one foot while moving the other, and then switches."
  },
  "014387": {
    "text": "a person walks forward  to bow then place hands on shoulders of other person and bows again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9309832919481437,
      "Minus Multimodal Distance": -1.7547239065170288,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0004116383206564933,
      "MoBERT-F": 0.5652082974390791,
      "MoBERT-N": 0.48423148349285355,
      "MoBERT-min(F/N)": 0.48423148349285355,
      "MoBERT-max(F/N)": 0.5652082974390791,
      "MotionCritic": -6.739962577819824,
      "VeMo (human-opt view)": 0.024460778407036993,
      "VeMo (max entropy view)": 0.024460778407036993,
      "VeMo (min entropy view)": 0.017952060976832815,
      "VeMo (random view)": 0.024460778407036993,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward to bow, then places hands on the shoulders of the other person and bows again."
  },
  "014389": {
    "text": "a person picks something up, rubs it and puts it back down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0279358433334334,
      "Minus Multimodal Distance": -10.053166389465332,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0005580832366831601,
      "MoBERT-F": 0.4318135273305663,
      "MoBERT-N": 0.5205352292849541,
      "MoBERT-min(F/N)": 0.4318135273305663,
      "MoBERT-max(F/N)": 0.5205352292849541,
      "MotionCritic": -0.42969492077827454,
      "VeMo (human-opt view)": 0.6926070038910506,
      "VeMo (max entropy view)": 0.6926070038910506,
      "VeMo (min entropy view)": 0.1645274212368728,
      "VeMo (random view)": 0.1645274212368728,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person picks something up, rubs it, and puts it back down."
  },
  "014392": {
    "text": "a person is making signals with both his hands",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5979149618932659,
      "Minus Multimodal Distance": -7.11613130569458,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.274564369348809e-05,
      "MoBERT-F": 0.4766164686389082,
      "MoBERT-N": 0.577173967305376,
      "MoBERT-min(F/N)": 0.4766164686389082,
      "MoBERT-max(F/N)": 0.577173967305376,
      "MotionCritic": -5.900538921356201,
      "VeMo (human-opt view)": 0.9896122432796765,
      "VeMo (max entropy view)": 0.8739837398373984,
      "VeMo (min entropy view)": 0.9896122432796765,
      "VeMo (random view)": 0.8739837398373984,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is making signals with both his hands."
  },
  "014420": {
    "text": "someone nervously pacing around in a circle",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.616570557860961,
      "Minus Multimodal Distance": -3.044581890106201,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.280735811335035e-05,
      "MoBERT-F": 0.5263407387537896,
      "MoBERT-N": 0.6101643306604139,
      "MoBERT-min(F/N)": 0.5263407387537896,
      "MoBERT-max(F/N)": 0.6101643306604139,
      "MotionCritic": -10.127477645874023,
      "VeMo (human-opt view)": 0.6511627906976745,
      "VeMo (max entropy view)": 0.6511627906976745,
      "VeMo (min entropy view)": 0.6520874751491054,
      "VeMo (random view)": 0.6511627906976745,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone is nervously pacing around in a circle."
  },
  "014428": {
    "text": "person is kicking an athletic ball",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.9492356804847313,
      "Minus Multimodal Distance": -9.791699409484863,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.060829476453364e-05,
      "MoBERT-F": 0.4996082795561779,
      "MoBERT-N": 0.6245149473840783,
      "MoBERT-min(F/N)": 0.4996082795561779,
      "MoBERT-max(F/N)": 0.6245149473840783,
      "MotionCritic": -8.385194778442383,
      "VeMo (human-opt view)": 0.004631229695168314,
      "VeMo (max entropy view)": 0.029379760609358,
      "VeMo (min entropy view)": 0.004631229695168314,
      "VeMo (random view)": 0.029379760609358,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is kicking an athletic ball."
  },
  "014436": {
    "text": "a stick figure jumps side ways.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.183021270065777,
      "Minus Multimodal Distance": -8.072710990905762,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5989736318588257,
      "MoBERT-F": 0.6999656279122959,
      "MoBERT-N": 0.6598418563311685,
      "MoBERT-min(F/N)": 0.6598418563311685,
      "MoBERT-max(F/N)": 0.6999656279122959,
      "MotionCritic": -7.797693729400635,
      "VeMo (human-opt view)": 0.3347457627118644,
      "VeMo (max entropy view)": 0.3347457627118644,
      "VeMo (min entropy view)": 0.28150134048257375,
      "VeMo (random view)": 0.28150134048257375,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure jumps sideways."
  },
  "014444": {
    "text": "figure appears to be walking back and forth",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.8768885152633972,
      "Minus Multimodal Distance": -8.711560249328613,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.7739584445953369,
      "MoBERT-F": 0.6641178975713526,
      "MoBERT-N": 0.6052510981127501,
      "MoBERT-min(F/N)": 0.6052510981127501,
      "MoBERT-max(F/N)": 0.6641178975713526,
      "MotionCritic": -6.478786945343018,
      "VeMo (human-opt view)": 0.997193717277487,
      "VeMo (max entropy view)": 0.997193717277487,
      "VeMo (min entropy view)": 0.9973668654951905,
      "VeMo (random view)": 0.997193717277487,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The figure appears to be walking back and forth."
  },
  "014445": {
    "text": "a person raises both arms to his face, then drops them back to his sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5370771780755432,
      "Minus Multimodal Distance": -6.108357906341553,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.538765668054111e-05,
      "MoBERT-F": 0.2712786776393126,
      "MoBERT-N": 0.4106419071029394,
      "MoBERT-min(F/N)": 0.2712786776393126,
      "MoBERT-max(F/N)": 0.4106419071029394,
      "MotionCritic": -6.8271942138671875,
      "VeMo (human-opt view)": 0.9997974209360481,
      "VeMo (max entropy view)": 0.9890675977333867,
      "VeMo (min entropy view)": 0.9997974209360481,
      "VeMo (random view)": 0.9997974209360481,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises both arms to their face, then drops them back to their sides."
  },
  "014448": {
    "text": "a person walks forward, hops backwards, then defends themselves by putting their hands up in defense",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.2252934532907696,
      "Minus Multimodal Distance": -11.43266773223877,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.004955713637173176,
      "MoBERT-F": 0.48689806710980477,
      "MoBERT-N": 0.5129997810966972,
      "MoBERT-min(F/N)": 0.48689806710980477,
      "MoBERT-max(F/N)": 0.5129997810966972,
      "MotionCritic": -12.27597427368164,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.5925925925925926,
      "VeMo (random view)": 0.5925925925925926,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, hops backwards, then defends themselves by putting their hands up in a defensive position."
  },
  "014472": {
    "text": "a person dances briefly and then performs a backflip.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -2.1969568389059173,
      "Minus Multimodal Distance": -9.046456336975098,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9924333095550537,
      "MoBERT-F": 0.7654009986485397,
      "MoBERT-N": 0.7588286725242318,
      "MoBERT-min(F/N)": 0.7588286725242318,
      "MoBERT-max(F/N)": 0.7654009986485397,
      "MotionCritic": -3.609668731689453,
      "VeMo (human-opt view)": 0.07183811129848229,
      "VeMo (max entropy view)": 0.4842105263157895,
      "VeMo (min entropy view)": 0.07183811129848229,
      "VeMo (random view)": 0.4842105263157895,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person briefly dances and then performs a backflip."
  },
  "014499": {
    "text": "a person brings his arms which were in the air along his body. his knees appear to be bent.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8078327737505829,
      "Minus Multimodal Distance": -6.686741828918457,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4105320335365832e-05,
      "MoBERT-F": 0.3625673273104092,
      "MoBERT-N": 0.47248341418306194,
      "MoBERT-min(F/N)": 0.3625673273104092,
      "MoBERT-max(F/N)": 0.47248341418306194,
      "MotionCritic": -3.7029190063476562,
      "VeMo (human-opt view)": 0.04743833017077799,
      "VeMo (max entropy view)": 0.39278131634819535,
      "VeMo (min entropy view)": 0.04743833017077799,
      "VeMo (random view)": 0.39278131634819535,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person brings his arms, which were in the air, down along his body. His knees appear to be bent."
  },
  "014506": {
    "text": "figure walks forward, raises foot to step up and over, uses other foot to drop down, walks forward and stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2852348809719654,
      "Minus Multimodal Distance": -9.154390335083008,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.02193308062851429,
      "MoBERT-F": 0.523070699296859,
      "MoBERT-N": 0.5035406524260089,
      "MoBERT-min(F/N)": 0.5035406524260089,
      "MoBERT-max(F/N)": 0.523070699296859,
      "MotionCritic": -7.372415542602539,
      "VeMo (human-opt view)": 0.9362999085644621,
      "VeMo (max entropy view)": 0.9362928797924474,
      "VeMo (min entropy view)": 0.9362999085644621,
      "VeMo (random view)": 0.9362999085644621,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The figure walks forward, raises a foot to step up and over, uses the other foot to drop down, walks forward, and then stops."
  },
  "014512": {
    "text": "a person walks backwards, then turns around then walks backwards again.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6853698645813666,
      "Minus Multimodal Distance": -9.246420860290527,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0006764198769815266,
      "MoBERT-F": 0.5193510043481935,
      "MoBERT-N": 0.5389008114591816,
      "MoBERT-min(F/N)": 0.5193510043481935,
      "MoBERT-max(F/N)": 0.5389008114591816,
      "MotionCritic": -9.754096984863281,
      "VeMo (human-opt view)": 0.8931572629051621,
      "VeMo (max entropy view)": 0.8931572629051621,
      "VeMo (min entropy view)": 0.9152086137281292,
      "VeMo (random view)": 0.9152086137281292,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks backwards, then turns around, then walks backwards again."
  },
  "014535": {
    "text": "a person appears to have severe arm pain holding and slouching their right shoulder.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7978648917921787,
      "Minus Multimodal Distance": -2.7421913146972656,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.34587273048237e-05,
      "MoBERT-F": 0.5179869563895527,
      "MoBERT-N": 0.572691839079053,
      "MoBERT-min(F/N)": 0.5179869563895527,
      "MoBERT-max(F/N)": 0.572691839079053,
      "MotionCritic": -4.659839153289795,
      "VeMo (human-opt view)": 0.7876182287188306,
      "VeMo (max entropy view)": 0.5920245398773006,
      "VeMo (min entropy view)": 0.7876182287188306,
      "VeMo (random view)": 0.7876182287188306,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person appears to have severe arm pain, holding and slouching their right shoulder."
  },
  "014554": {
    "text": "a person who seems to slap something",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6500306299846893,
      "Minus Multimodal Distance": -6.56302547454834,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00041080662049353123,
      "MoBERT-F": 0.49789639687532683,
      "MoBERT-N": 0.48211185812136326,
      "MoBERT-min(F/N)": 0.48211185812136326,
      "MoBERT-max(F/N)": 0.49789639687532683,
      "MotionCritic": -3.476983070373535,
      "VeMo (human-opt view)": 0.7547826086956522,
      "VeMo (max entropy view)": 0.7547826086956522,
      "VeMo (min entropy view)": 0.8517241379310345,
      "VeMo (random view)": 0.7547826086956522,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who seems to slap something."
  },
  "014565": {
    "text": "person walks forward, turns and walks back",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5512154865088033,
      "Minus Multimodal Distance": -11.045546531677246,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.02987249195575714,
      "MoBERT-F": 0.5694423377068702,
      "MoBERT-N": 0.6609108053248682,
      "MoBERT-min(F/N)": 0.5694423377068702,
      "MoBERT-max(F/N)": 0.6609108053248682,
      "MotionCritic": -14.927978515625,
      "VeMo (human-opt view)": 0.9770104497955475,
      "VeMo (max entropy view)": 0.9604365620736699,
      "VeMo (min entropy view)": 0.9770104497955475,
      "VeMo (random view)": 0.9770104497955475,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, turns around and walks back."
  },
  "014603": {
    "text": "a man raises his hands up on the air,starts clapping and then lowers them down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.748847029288,
      "Minus Multimodal Distance": -9.28127384185791,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5885270588332787e-05,
      "MoBERT-F": 0.3231034622306237,
      "MoBERT-N": 0.4696912599811079,
      "MoBERT-min(F/N)": 0.3231034622306237,
      "MoBERT-max(F/N)": 0.4696912599811079,
      "MotionCritic": -8.15604305267334,
      "VeMo (human-opt view)": 0.9914040114613181,
      "VeMo (max entropy view)": 0.6652542372881356,
      "VeMo (min entropy view)": 0.9914040114613181,
      "VeMo (random view)": 0.9914040114613181,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man raises his hands up in the air, starts clapping, and then lowers them down."
  },
  "014604": {
    "text": "a man paces back and forth along the same line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.916619485230853,
      "Minus Multimodal Distance": -10.510306358337402,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9373013377189636,
      "MoBERT-F": 0.6750386488493079,
      "MoBERT-N": 0.6571287659381713,
      "MoBERT-min(F/N)": 0.6571287659381713,
      "MoBERT-max(F/N)": 0.6750386488493079,
      "MotionCritic": -8.862236022949219,
      "VeMo (human-opt view)": 0.9647812166488794,
      "VeMo (max entropy view)": 0.957933868127568,
      "VeMo (min entropy view)": 0.9647812166488794,
      "VeMo (random view)": 0.9647812166488794,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man paces back and forth along the same line."
  },
  "014612": {
    "text": "a person rolls their arms and shoulders.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8140708776809438,
      "Minus Multimodal Distance": -9.798027992248535,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.409744047326967e-05,
      "MoBERT-F": 0.40167850562885093,
      "MoBERT-N": 0.4800823667429372,
      "MoBERT-min(F/N)": 0.40167850562885093,
      "MoBERT-max(F/N)": 0.4800823667429372,
      "MotionCritic": -8.856508255004883,
      "VeMo (human-opt view)": 0.9850585058505851,
      "VeMo (max entropy view)": 0.9850585058505851,
      "VeMo (min entropy view)": 0.9883094992115709,
      "VeMo (random view)": 0.9883094992115709,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person rolls their arms and shoulders."
  }
}