{
  "007281": {
    "text": "a person bends slightly at the shoulders and mimics the movements of picking something up with both arms moving it from their left side to their right side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.12343406236881949,
      "Minus Multimodal Distance": -6.621583938598633,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.3633466955507174e-05,
      "MoBERT-F": 0.2685138153633721,
      "MoBERT-N": 0.3697352392754945,
      "MoBERT-min(F/N)": 0.2685138153633721,
      "MoBERT-max(F/N)": 0.3697352392754945,
      "MotionCritic": -5.5784010887146,
      "VeMo (human-opt view)": 0.8669354838709677,
      "VeMo (max entropy view)": 0.26878612716763006,
      "VeMo (min entropy view)": 0.8669354838709677,
      "VeMo (random view)": 0.8669354838709677,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person bends slightly at the shoulders and mimics the movements of picking something up, moving it from their left side to their right side with both arms."
  },
  "011797": {
    "text": "a person is attempting to jump rope by hopping from one leg to the other as if running in place, but has to reset every two to three jumps.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6880343533031194,
      "Minus Multimodal Distance": -4.606210708618164,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9974547028541565,
      "MoBERT-F": 0.8006058558852767,
      "MoBERT-N": 0.8351623028058379,
      "MoBERT-min(F/N)": 0.8006058558852767,
      "MoBERT-max(F/N)": 0.8351623028058379,
      "MotionCritic": -6.345391273498535,
      "VeMo (human-opt view)": 0.532,
      "VeMo (max entropy view)": 0.532,
      "VeMo (min entropy view)": 0.5466237942122186,
      "VeMo (random view)": 0.5466237942122186,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is attempting to jump rope by hopping from one leg to the other, as if running in place, but has to reset every two to three jumps."
  },
  "002388": {
    "text": "someone carefully looks behind them while backing up, then uses both hands to dip into a slight sitting motion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.539512675833782,
      "Minus Multimodal Distance": -5.178614139556885,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.898938775062561,
      "MoBERT-F": 0.6269470799791683,
      "MoBERT-N": 0.521482255046976,
      "MoBERT-min(F/N)": 0.521482255046976,
      "MoBERT-max(F/N)": 0.6269470799791683,
      "MotionCritic": -8.199857711791992,
      "VeMo (human-opt view)": 0.4831081081081081,
      "VeMo (max entropy view)": 0.4831081081081081,
      "VeMo (min entropy view)": 0.743142144638404,
      "VeMo (random view)": 0.743142144638404,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone carefully looks behind themselves while backing up, then uses both hands to dip into a slight sitting motion."
  },
  "006819": {
    "text": "person brings left forearm up to their eyesight like they are checking the time on a watch.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3432270180799429,
      "Minus Multimodal Distance": -10.997893333435059,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.4337375331670046e-05,
      "MoBERT-F": 0.3087952599228997,
      "MoBERT-N": 0.38688420580331506,
      "MoBERT-min(F/N)": 0.3087952599228997,
      "MoBERT-max(F/N)": 0.38688420580331506,
      "MotionCritic": -7.18307638168335,
      "VeMo (human-opt view)": 0.6501128668171557,
      "VeMo (max entropy view)": 0.6501128668171557,
      "VeMo (min entropy view)": 0.731006160164271,
      "VeMo (random view)": 0.6501128668171557,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person brings their left forearm up to eye level as if they are checking the time on a watch."
  },
  "011075": {
    "text": "a man bends over and puts his hands on the ground and is on all fours",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.800409842329173,
      "Minus Multimodal Distance": -10.682537078857422,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9958833456039429,
      "MoBERT-F": 0.6976095581818702,
      "MoBERT-N": 0.5560607081795906,
      "MoBERT-min(F/N)": 0.5560607081795906,
      "MoBERT-max(F/N)": 0.6976095581818702,
      "MotionCritic": -5.9899468421936035,
      "VeMo (human-opt view)": 0.9984045382024463,
      "VeMo (max entropy view)": 0.9959485927396635,
      "VeMo (min entropy view)": 0.9984045382024463,
      "VeMo (random view)": 0.9984045382024463,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man bends over, puts his hands on the ground, and is on all fours."
  },
  "004297": {
    "text": "a man steps back and shields his face with both of his hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6207468884917037,
      "Minus Multimodal Distance": -11.0394926071167,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5588035583496094,
      "MoBERT-F": 0.6383831682589938,
      "MoBERT-N": 0.655382463579155,
      "MoBERT-min(F/N)": 0.6383831682589938,
      "MoBERT-max(F/N)": 0.655382463579155,
      "MotionCritic": -8.588074684143066,
      "VeMo (human-opt view)": 0.9197080291970803,
      "VeMo (max entropy view)": 0.788135593220339,
      "VeMo (min entropy view)": 0.9197080291970803,
      "VeMo (random view)": 0.9197080291970803,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man steps back and shields his face with both of his hands."
  },
  "012400": {
    "text": "someone sees something on the ground the they move slowly away from",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6367696049769533,
      "Minus Multimodal Distance": -8.154894828796387,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.006609829608350992,
      "MoBERT-F": 0.4282776399942769,
      "MoBERT-N": 0.35222969804966875,
      "MoBERT-min(F/N)": 0.35222969804966875,
      "MoBERT-max(F/N)": 0.4282776399942769,
      "MotionCritic": -6.301889896392822,
      "VeMo (human-opt view)": 0.8265560165975103,
      "VeMo (max entropy view)": 0.7547826086956522,
      "VeMo (min entropy view)": 0.8265560165975103,
      "VeMo (random view)": 0.7547826086956522,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone sees something on the ground, then they move slowly away from it."
  },
  "014604": {
    "text": "a man paces back and forth along the same line.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8937956488401687,
      "Minus Multimodal Distance": -6.245355129241943,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.05472901090979576,
      "MoBERT-F": 0.607824144850446,
      "MoBERT-N": 0.7375160023201723,
      "MoBERT-min(F/N)": 0.607824144850446,
      "MoBERT-max(F/N)": 0.7375160023201723,
      "MotionCritic": -16.449420928955078,
      "VeMo (human-opt view)": 0.8989784335981839,
      "VeMo (max entropy view)": 0.8989784335981839,
      "VeMo (min entropy view)": 0.943467676494455,
      "VeMo (random view)": 0.943467676494455,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man paces back and forth along the same line."
  },
  "009673": {
    "text": "a person throws their right hand up",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7431718065005684,
      "Minus Multimodal Distance": -13.416168212890625,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.008532857522368431,
      "MoBERT-F": 0.5256938681081281,
      "MoBERT-N": 0.5871265095463339,
      "MoBERT-min(F/N)": 0.5256938681081281,
      "MoBERT-max(F/N)": 0.5871265095463339,
      "MotionCritic": -8.321547508239746,
      "VeMo (human-opt view)": 0.9850422718404509,
      "VeMo (max entropy view)": 0.9808773903262092,
      "VeMo (min entropy view)": 0.9850422718404509,
      "VeMo (random view)": 0.9850422718404509,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person throws their right hand up."
  },
  "003282": {
    "text": "the person is standing up being flexable",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6590096607631845,
      "Minus Multimodal Distance": -4.150448799133301,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4038130504777655e-05,
      "MoBERT-F": 0.4175997354513657,
      "MoBERT-N": 0.46928190409153075,
      "MoBERT-min(F/N)": 0.4175997354513657,
      "MoBERT-max(F/N)": 0.46928190409153075,
      "MotionCritic": -2.826742172241211,
      "VeMo (human-opt view)": 0.967062818336163,
      "VeMo (max entropy view)": 0.9627182633317602,
      "VeMo (min entropy view)": 0.967062818336163,
      "VeMo (random view)": 0.967062818336163,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is standing up and being flexible."
  },
  "001664": {
    "text": "losing balance, moving backwards with both feet.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.37098213683510095,
      "Minus Multimodal Distance": -10.615768432617188,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.17650188505649567,
      "MoBERT-F": 0.5629719725775613,
      "MoBERT-N": 0.4792519720942582,
      "MoBERT-min(F/N)": 0.4792519720942582,
      "MoBERT-max(F/N)": 0.5629719725775613,
      "MotionCritic": -3.5659983158111572,
      "VeMo (human-opt view)": 0.42245989304812837,
      "VeMo (max entropy view)": 0.42245989304812837,
      "VeMo (min entropy view)": 0.6652267818574514,
      "VeMo (random view)": 0.6652267818574514,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Losing balance and moving backwards with both feet."
  },
  "008191": {
    "text": "the man puts something down and walks",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6440443153914812,
      "Minus Multimodal Distance": -9.483075141906738,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.058996595442295074,
      "MoBERT-F": 0.5024300177956176,
      "MoBERT-N": 0.46600022881186687,
      "MoBERT-min(F/N)": 0.46600022881186687,
      "MoBERT-max(F/N)": 0.5024300177956176,
      "MotionCritic": -4.944224834442139,
      "VeMo (human-opt view)": 0.39285714285714285,
      "VeMo (max entropy view)": 0.39285714285714285,
      "VeMo (min entropy view)": 0.164021164021164,
      "VeMo (random view)": 0.39285714285714285,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man puts something down and walks."
  },
  "013703": {
    "text": "this person is stretching his right leg.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6548998312165349,
      "Minus Multimodal Distance": -12.391310691833496,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00010386392386863008,
      "MoBERT-F": 0.48490864886214224,
      "MoBERT-N": 0.48319052599600665,
      "MoBERT-min(F/N)": 0.48319052599600665,
      "MoBERT-max(F/N)": 0.48490864886214224,
      "MotionCritic": -5.934438228607178,
      "VeMo (human-opt view)": 0.9875577288780223,
      "VeMo (max entropy view)": 0.9850317124735729,
      "VeMo (min entropy view)": 0.9875577288780223,
      "VeMo (random view)": 0.9850317124735729,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person is stretching his right leg."
  },
  "012956": {
    "text": "person seems to be preparing food",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.26229814670968604,
      "Minus Multimodal Distance": -9.448993682861328,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00011182921298313886,
      "MoBERT-F": 0.3195215175419561,
      "MoBERT-N": 0.3911807687272465,
      "MoBERT-min(F/N)": 0.3195215175419561,
      "MoBERT-max(F/N)": 0.3911807687272465,
      "MotionCritic": -9.16175651550293,
      "VeMo (human-opt view)": 2.152906078322723e-05,
      "VeMo (max entropy view)": 2.152906078322723e-05,
      "VeMo (min entropy view)": 1.0115274699143443e-05,
      "VeMo (random view)": 2.152906078322723e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person seems to be preparing food."
  },
  "013056": {
    "text": "a figure quickly walks forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.26311632649158156,
      "Minus Multimodal Distance": -10.1785306930542,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.449484236421995e-05,
      "MoBERT-F": 0.44391450421978984,
      "MoBERT-N": 0.4772732998667462,
      "MoBERT-min(F/N)": 0.44391450421978984,
      "MoBERT-max(F/N)": 0.4772732998667462,
      "MotionCritic": -11.58166217803955,
      "VeMo (human-opt view)": 0.9045592705167174,
      "VeMo (max entropy view)": 0.9045592705167174,
      "VeMo (min entropy view)": 0.9285714285714286,
      "VeMo (random view)": 0.9285714285714286,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure walks forward quickly."
  },
  "004012": {
    "text": "the person raises their left foot up to their kinee and then kicks their foot out,  then returns their foot to their knee.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4964822660045595,
      "Minus Multimodal Distance": -9.206721305847168,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9905573129653931,
      "MoBERT-F": 0.7586773468490017,
      "MoBERT-N": 0.6761229471921535,
      "MoBERT-min(F/N)": 0.6761229471921535,
      "MoBERT-max(F/N)": 0.7586773468490017,
      "MotionCritic": -3.1610817909240723,
      "VeMo (human-opt view)": 0.8597748208802457,
      "VeMo (max entropy view)": 0.7878172588832487,
      "VeMo (min entropy view)": 0.8597748208802457,
      "VeMo (random view)": 0.7878172588832487,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person raises their left foot up to their knee and then kicks their foot out, then returns their foot to their knee."
  },
  "005674": {
    "text": "a person sits down, jiggles his right knee, raises both hands quickly to his shoulders then rubs his stomach with his left hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8973234087364508,
      "Minus Multimodal Distance": -7.016880035400391,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0023729526437819004,
      "MoBERT-F": 0.416425075844025,
      "MoBERT-N": 0.49458711978873127,
      "MoBERT-min(F/N)": 0.416425075844025,
      "MoBERT-max(F/N)": 0.49458711978873127,
      "MotionCritic": -10.75085735321045,
      "VeMo (human-opt view)": 0.5918367346938775,
      "VeMo (max entropy view)": 0.5918367346938775,
      "VeMo (min entropy view)": 0.7436974789915967,
      "VeMo (random view)": 0.5918367346938775,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits down, jiggles his right knee, raises both hands quickly to his shoulders, then rubs his stomach with his left hand."
  },
  "003108": {
    "text": "a person with a wide stance repeatedly lash out with each hand at something in front of them.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6400753624958455,
      "Minus Multimodal Distance": -2.5607070922851562,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.420971552259289e-05,
      "MoBERT-F": 0.3490998344955847,
      "MoBERT-N": 0.4544970592833853,
      "MoBERT-min(F/N)": 0.3490998344955847,
      "MoBERT-max(F/N)": 0.4544970592833853,
      "MotionCritic": -3.9926834106445312,
      "VeMo (human-opt view)": 0.910158013544018,
      "VeMo (max entropy view)": 0.910158013544018,
      "VeMo (min entropy view)": 0.9324618736383442,
      "VeMo (random view)": 0.9324618736383442,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person with a wide stance repeatedly lashes out with each hand at something in front of them."
  },
  "011965": {
    "text": "a person abruptly stumbles forward and regains his balance as if he had been pushed from behind.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.29336680057104314,
      "Minus Multimodal Distance": -2.7930893898010254,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.295126589364372e-05,
      "MoBERT-F": 0.3268911558408845,
      "MoBERT-N": 0.4009326021742305,
      "MoBERT-min(F/N)": 0.3268911558408845,
      "MoBERT-max(F/N)": 0.4009326021742305,
      "MotionCritic": -1.3917723894119263,
      "VeMo (human-opt view)": 0.035303451011503374,
      "VeMo (max entropy view)": 0.28173374613003094,
      "VeMo (min entropy view)": 0.035303451011503374,
      "VeMo (random view)": 0.28173374613003094,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person abruptly stumbles forward and regains their balance as if they had been pushed from behind."
  },
  "011683": {
    "text": "a man lowers his arms and places his hands on his knees.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6428330698913759,
      "Minus Multimodal Distance": -6.339606285095215,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.8289970689220354e-05,
      "MoBERT-F": 0.3946076673395674,
      "MoBERT-N": 0.48844968817182843,
      "MoBERT-min(F/N)": 0.3946076673395674,
      "MoBERT-max(F/N)": 0.48844968817182843,
      "MotionCritic": -7.01470947265625,
      "VeMo (human-opt view)": 0.9841162554917202,
      "VeMo (max entropy view)": 0.9740501478156137,
      "VeMo (min entropy view)": 0.9841162554917202,
      "VeMo (random view)": 0.9841162554917202,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man lowers his arms and places his hands on his knees."
  },
  "005037": {
    "text": "the drunk guy struggles to walk down the street",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4390802235477689,
      "Minus Multimodal Distance": -10.179699897766113,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0008384158136323094,
      "MoBERT-F": 0.49907727186877165,
      "MoBERT-N": 0.5740719340836591,
      "MoBERT-min(F/N)": 0.49907727186877165,
      "MoBERT-max(F/N)": 0.5740719340836591,
      "MotionCritic": -2.8331961631774902,
      "VeMo (human-opt view)": 0.7875816993464052,
      "VeMo (max entropy view)": 0.7552447552447552,
      "VeMo (min entropy view)": 0.7875816993464052,
      "VeMo (random view)": 0.7875816993464052,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The drunk guy struggles to walk down the street."
  },
  "011716": {
    "text": "a person is climbing up a ladder and cleaning",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6446463161774422,
      "Minus Multimodal Distance": -6.991913795471191,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.567460834980011,
      "MoBERT-F": 0.7790556050978603,
      "MoBERT-N": 0.6104736725333346,
      "MoBERT-min(F/N)": 0.6104736725333346,
      "MoBERT-max(F/N)": 0.7790556050978603,
      "MotionCritic": -8.0564546585083,
      "VeMo (human-opt view)": 4.227079101743629e-06,
      "VeMo (max entropy view)": 5.426831540318032e-06,
      "VeMo (min entropy view)": 4.227079101743629e-06,
      "VeMo (random view)": 4.227079101743629e-06,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is climbing up a ladder and cleaning."
  },
  "012005": {
    "text": "a person bends down and crawls to the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8228387826747604,
      "Minus Multimodal Distance": -6.742308616638184,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9913835525512695,
      "MoBERT-F": 0.82145932870217,
      "MoBERT-N": 0.7789244014553123,
      "MoBERT-min(F/N)": 0.7789244014553123,
      "MoBERT-max(F/N)": 0.82145932870217,
      "MotionCritic": -8.989299774169922,
      "VeMo (human-opt view)": 0.9147286821705426,
      "VeMo (max entropy view)": 0.9147286821705426,
      "VeMo (min entropy view)": 0.9740603493912123,
      "VeMo (random view)": 0.9740603493912123,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person bends down and crawls to the left."
  },
  "001527": {
    "text": "a man walks forward in a snake like pattern.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7716615547094743,
      "Minus Multimodal Distance": -1.9218394756317139,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0007408543024212122,
      "MoBERT-F": 0.4693028387462524,
      "MoBERT-N": 0.5036418551592333,
      "MoBERT-min(F/N)": 0.4693028387462524,
      "MoBERT-max(F/N)": 0.5036418551592333,
      "MotionCritic": -5.871033191680908,
      "VeMo (human-opt view)": 0.5301204819277109,
      "VeMo (max entropy view)": 0.5301204819277109,
      "VeMo (min entropy view)": 0.5778443113772455,
      "VeMo (random view)": 0.5778443113772455,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man walks forward in a snake - like pattern."
  },
  "010898": {
    "text": "a person kicks something with their right foot.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5330997027905449,
      "Minus Multimodal Distance": -10.339564323425293,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9906484484672546,
      "MoBERT-F": 0.8552702302617552,
      "MoBERT-N": 0.7014546051611408,
      "MoBERT-min(F/N)": 0.7014546051611408,
      "MoBERT-max(F/N)": 0.8552702302617552,
      "MotionCritic": -5.922544956207275,
      "VeMo (human-opt view)": 0.9867535287730728,
      "VeMo (max entropy view)": 0.9647528264242962,
      "VeMo (min entropy view)": 0.9867535287730728,
      "VeMo (random view)": 0.9647528264242962,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person kicks something with their right foot."
  },
  "001632": {
    "text": "a man sits down and then stays still.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6693911218673703,
      "Minus Multimodal Distance": -7.410311222076416,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.10193562507629395,
      "MoBERT-F": 0.4487971864441703,
      "MoBERT-N": 0.4056641168160684,
      "MoBERT-min(F/N)": 0.4056641168160684,
      "MoBERT-max(F/N)": 0.4487971864441703,
      "MotionCritic": -7.1233673095703125,
      "VeMo (human-opt view)": 0.8265835929387332,
      "VeMo (max entropy view)": 0.8265835929387332,
      "VeMo (min entropy view)": 0.8361344537815126,
      "VeMo (random view)": 0.8361344537815126,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man sits down and then stays still."
  },
  "008266": {
    "text": "walking to the side then the other.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9206095030297444,
      "Minus Multimodal Distance": -3.321408987045288,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.408275759080425e-05,
      "MoBERT-F": 0.38185599180585683,
      "MoBERT-N": 0.5633064079313406,
      "MoBERT-min(F/N)": 0.38185599180585683,
      "MoBERT-max(F/N)": 0.5633064079313406,
      "MotionCritic": -4.6740403175354,
      "VeMo (human-opt view)": 0.9808429118773946,
      "VeMo (max entropy view)": 0.9434467971053337,
      "VeMo (min entropy view)": 0.9808429118773946,
      "VeMo (random view)": 0.9434467971053337,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks to one side then the other."
  },
  "003171": {
    "text": "the person is walking down arms out.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5117131562670687,
      "Minus Multimodal Distance": -5.696864604949951,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0008954927325248718,
      "MoBERT-F": 0.6029325762701476,
      "MoBERT-N": 0.5385267803167783,
      "MoBERT-min(F/N)": 0.5385267803167783,
      "MoBERT-max(F/N)": 0.6029325762701476,
      "MotionCritic": -3.290250539779663,
      "VeMo (human-opt view)": 0.9859343603482921,
      "VeMo (max entropy view)": 0.9722849517552864,
      "VeMo (min entropy view)": 0.9859343603482921,
      "VeMo (random view)": 0.9722849517552864,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is walking down with arms out."
  },
  "005141": {
    "text": "jumping up in place.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9065896649277433,
      "Minus Multimodal Distance": -5.267050266265869,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.5925624370574951,
      "MoBERT-F": 0.7749696155603824,
      "MoBERT-N": 0.7330042895308851,
      "MoBERT-min(F/N)": 0.7330042895308851,
      "MoBERT-max(F/N)": 0.7749696155603824,
      "MotionCritic": -6.759605407714844,
      "VeMo (human-opt view)": 0.22203098106712565,
      "VeMo (max entropy view)": 0.22203098106712565,
      "VeMo (min entropy view)": 0.15599095704596835,
      "VeMo (random view)": 0.22203098106712565,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is jumping up in place."
  },
  "014185": {
    "text": "a person who is standing on a balance beam takes three steps forward and then steps down off the beam.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3894646272288217,
      "Minus Multimodal Distance": -9.392045021057129,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3815247914171778e-05,
      "MoBERT-F": 0.40615672431797045,
      "MoBERT-N": 0.537532070614084,
      "MoBERT-min(F/N)": 0.40615672431797045,
      "MoBERT-max(F/N)": 0.537532070614084,
      "MotionCritic": -5.4256181716918945,
      "VeMo (human-opt view)": 0.21189591078066913,
      "VeMo (max entropy view)": 0.21189591078066913,
      "VeMo (min entropy view)": 0.13994910941475827,
      "VeMo (random view)": 0.21189591078066913,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who is standing on a balance beam takes three steps forward and then steps down off the beam."
  },
  "004864": {
    "text": "a person is in a fighting stance with their legs spread and fists raised. they hop forward and kick out with their left foot before returning to their original position.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7036386394674484,
      "Minus Multimodal Distance": -3.788428544998169,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9868521094322205,
      "MoBERT-F": 0.7528404744029462,
      "MoBERT-N": 0.6724415160079315,
      "MoBERT-min(F/N)": 0.6724415160079315,
      "MoBERT-max(F/N)": 0.7528404744029462,
      "MotionCritic": -6.0400919914245605,
      "VeMo (human-opt view)": 0.9362928797924474,
      "VeMo (max entropy view)": 0.9362928797924474,
      "VeMo (min entropy view)": 0.9554615576013934,
      "VeMo (random view)": 0.9362928797924474,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is in a fighting stance, with their legs spread and fists raised. They hop forward, kick out with their left foot, and then return to their original position."
  },
  "010529": {
    "text": "person is walking forward while leaning on something with the left arm, then they stop and bring their arms out to the side",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.35858863740759594,
      "Minus Multimodal Distance": -2.3304026126861572,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9611033824039623e-05,
      "MoBERT-F": 0.34279011838234597,
      "MoBERT-N": 0.4765467109829945,
      "MoBERT-min(F/N)": 0.34279011838234597,
      "MoBERT-max(F/N)": 0.4765467109829945,
      "MotionCritic": -12.927616119384766,
      "VeMo (human-opt view)": 0.8441961514587213,
      "VeMo (max entropy view)": 0.8441961514587213,
      "VeMo (min entropy view)": 0.8594594594594595,
      "VeMo (random view)": 0.8441961514587213,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking forward while leaning on something with their left arm. Then, they stop and extend their arms out to the sides."
  },
  "011652": {
    "text": "a person jumps in the air, then abruptly stumbles to his left as if he had been pushed, and finally he regains his balance.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.28688903406645433,
      "Minus Multimodal Distance": -8.743722915649414,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.09649504721164703,
      "MoBERT-F": 0.5876621018355647,
      "MoBERT-N": 0.5921148547742512,
      "MoBERT-min(F/N)": 0.5876621018355647,
      "MoBERT-max(F/N)": 0.5921148547742512,
      "MotionCritic": -13.849465370178223,
      "VeMo (human-opt view)": 0.5463576158940397,
      "VeMo (max entropy view)": 0.5463576158940397,
      "VeMo (min entropy view)": 0.42346938775510207,
      "VeMo (random view)": 0.42346938775510207,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person jumps into the air, then abruptly stumbles to his left as if he had been pushed, and finally, he regains his balance."
  },
  "002235": {
    "text": "person leans forward goes onto knees whilst first putting left hand on ground for support and stays on knees",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.52760191632071,
      "Minus Multimodal Distance": -7.620486736297607,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.27133265137672424,
      "MoBERT-F": 0.5302318986558416,
      "MoBERT-N": 0.555360132821511,
      "MoBERT-min(F/N)": 0.5302318986558416,
      "MoBERT-max(F/N)": 0.555360132821511,
      "MotionCritic": -6.720884799957275,
      "VeMo (human-opt view)": 0.9722849517552864,
      "VeMo (max entropy view)": 0.9722849517552864,
      "VeMo (min entropy view)": 0.9820419863417924,
      "VeMo (random view)": 0.9722849517552864,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person leans forward, goes down onto their knees, first placing their left hand on the ground for support, and then remains on their knees."
  },
  "010648": {
    "text": "a man steps forward, then suddenly staggers to his left, before returning to his original course.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4238949124541332,
      "Minus Multimodal Distance": -5.339198589324951,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0002733591536525637,
      "MoBERT-F": 0.43755535452843447,
      "MoBERT-N": 0.5868691362173617,
      "MoBERT-min(F/N)": 0.43755535452843447,
      "MoBERT-max(F/N)": 0.5868691362173617,
      "MotionCritic": -6.352097034454346,
      "VeMo (human-opt view)": 0.7053571428571429,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.7053571428571429,
      "VeMo (random view)": 0.5,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man steps forward, then suddenly staggers to his left before returning to his original course."
  },
  "006132": {
    "text": "the man holds something above his left shoulder and rubs it with his right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.2558614234796053,
      "Minus Multimodal Distance": -13.497509002685547,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.5699136282783e-05,
      "MoBERT-F": 0.24508201294130744,
      "MoBERT-N": 0.3352702064983324,
      "MoBERT-min(F/N)": 0.24508201294130744,
      "MoBERT-max(F/N)": 0.3352702064983324,
      "MotionCritic": -8.555830001831055,
      "VeMo (human-opt view)": 0.8519362186788155,
      "VeMo (max entropy view)": 0.7547169811320755,
      "VeMo (min entropy view)": 0.8519362186788155,
      "VeMo (random view)": 0.7547169811320755,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man holds something above his left shoulder and rubs it with his right hand."
  },
  "000571": {
    "text": "a person loses his balance towards his right and then steps back towards his left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3522364206964082,
      "Minus Multimodal Distance": -11.228069305419922,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.122058402979746e-05,
      "MoBERT-F": 0.42258347713122757,
      "MoBERT-N": 0.52216810207927,
      "MoBERT-min(F/N)": 0.42258347713122757,
      "MoBERT-max(F/N)": 0.52216810207927,
      "MotionCritic": -8.120616912841797,
      "VeMo (human-opt view)": 0.9103078982597055,
      "VeMo (max entropy view)": 0.9099041533546326,
      "VeMo (min entropy view)": 0.9103078982597055,
      "VeMo (random view)": 0.9099041533546326,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person loses their balance to the right and then steps back to the left."
  },
  "012805": {
    "text": "a man crouches down while quickly walking forward and then stands up straight.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4583651165023028,
      "Minus Multimodal Distance": -6.777015686035156,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.11586545407772064,
      "MoBERT-F": 0.5758820931834511,
      "MoBERT-N": 0.5110674142993189,
      "MoBERT-min(F/N)": 0.5110674142993189,
      "MoBERT-max(F/N)": 0.5758820931834511,
      "MotionCritic": -1.5055168867111206,
      "VeMo (human-opt view)": 0.9197751660705161,
      "VeMo (max entropy view)": 0.8813114754098361,
      "VeMo (min entropy view)": 0.9197751660705161,
      "VeMo (random view)": 0.8813114754098361,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man crouches down while quickly walking forward and then stands up straight."
  },
  "003812": {
    "text": "a person whos put both hands together and is begging",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5607679410302648,
      "Minus Multimodal Distance": -7.400634288787842,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6008970962720923e-05,
      "MoBERT-F": 0.4146602432081154,
      "MoBERT-N": 0.439346740757675,
      "MoBERT-min(F/N)": 0.4146602432081154,
      "MoBERT-max(F/N)": 0.439346740757675,
      "MotionCritic": -1.6459338665008545,
      "VeMo (human-opt view)": 0.964727163099186,
      "VeMo (max entropy view)": 0.9467107680999632,
      "VeMo (min entropy view)": 0.964727163099186,
      "VeMo (random view)": 0.9467107680999632,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who puts both hands together and is begging."
  },
  "008955": {
    "text": "man left foot stump one then stay in standing position.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.32521532064235537,
      "Minus Multimodal Distance": -10.349919319152832,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 7.021361670922488e-05,
      "MoBERT-F": 0.3624402469099017,
      "MoBERT-N": 0.4699883422530869,
      "MoBERT-min(F/N)": 0.3624402469099017,
      "MoBERT-max(F/N)": 0.4699883422530869,
      "MotionCritic": -6.707151889801025,
      "VeMo (human-opt view)": 0.8077858880778589,
      "VeMo (max entropy view)": 0.7980769230769231,
      "VeMo (min entropy view)": 0.8077858880778589,
      "VeMo (random view)": 0.7980769230769231,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man's left foot was amputated at the stump, and then he remained in a standing position."
  },
  "000072": {
    "text": "a person throws and upper cut with his right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7053402094350345,
      "Minus Multimodal Distance": -9.057234764099121,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.4301075041294098,
      "MoBERT-F": 0.727623953901211,
      "MoBERT-N": 0.7427147581647555,
      "MoBERT-min(F/N)": 0.727623953901211,
      "MoBERT-max(F/N)": 0.7427147581647555,
      "MotionCritic": -6.543031215667725,
      "VeMo (human-opt view)": 0.8736532810969637,
      "VeMo (max entropy view)": 0.8352835283528353,
      "VeMo (min entropy view)": 0.8736532810969637,
      "VeMo (random view)": 0.8736532810969637,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person throws an uppercut with his right hand."
  },
  "001906": {
    "text": "the person was laying  down and then they got up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4175706012465652,
      "Minus Multimodal Distance": -6.365607261657715,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9929758310317993,
      "MoBERT-F": 0.6318670600803011,
      "MoBERT-N": 0.49370447068343626,
      "MoBERT-min(F/N)": 0.49370447068343626,
      "MoBERT-max(F/N)": 0.6318670600803011,
      "MotionCritic": -4.993841648101807,
      "VeMo (human-opt view)": 0.9944465226023652,
      "VeMo (max entropy view)": 0.9914443375689769,
      "VeMo (min entropy view)": 0.9944465226023652,
      "VeMo (random view)": 0.9914443375689769,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person was lying down and then they got up."
  },
  "004495": {
    "text": "a person turns to his left and looks around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7712110540566081,
      "Minus Multimodal Distance": -8.804930686950684,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.520642192394007e-05,
      "MoBERT-F": 0.35852844251189736,
      "MoBERT-N": 0.50687392123188,
      "MoBERT-min(F/N)": 0.35852844251189736,
      "MoBERT-max(F/N)": 0.50687392123188,
      "MotionCritic": -14.250354766845703,
      "VeMo (human-opt view)": 0.9101251422070534,
      "VeMo (max entropy view)": 0.9101251422070534,
      "VeMo (min entropy view)": 0.9466089466089466,
      "VeMo (random view)": 0.9101251422070534,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person turns to his left and looks around."
  },
  "003761": {
    "text": "a person sitting and readjusting position to the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8255150722434714,
      "Minus Multimodal Distance": -4.78424072265625,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.6743199825286865,
      "MoBERT-F": 0.5197897826068296,
      "MoBERT-N": 0.38513067693547876,
      "MoBERT-min(F/N)": 0.38513067693547876,
      "MoBERT-max(F/N)": 0.5197897826068296,
      "MotionCritic": -9.238105773925781,
      "VeMo (human-opt view)": 0.4833948339483395,
      "VeMo (max entropy view)": 0.4833948339483395,
      "VeMo (min entropy view)": 0.40589569160997735,
      "VeMo (random view)": 0.4833948339483395,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is sitting and readjusting their position to the left."
  },
  "012145": {
    "text": "a person is clapping his hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.26460276087347906,
      "Minus Multimodal Distance": -9.466546058654785,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8137470508227125e-05,
      "MoBERT-F": 0.32230650886772183,
      "MoBERT-N": 0.4201907414158352,
      "MoBERT-min(F/N)": 0.32230650886772183,
      "MoBERT-max(F/N)": 0.4201907414158352,
      "MotionCritic": -3.5674266815185547,
      "VeMo (human-opt view)": 0.9980765083883211,
      "VeMo (max entropy view)": 0.9466764061358656,
      "VeMo (min entropy view)": 0.9980765083883211,
      "VeMo (random view)": 0.9466764061358656,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is clapping their hands."
  },
  "013315": {
    "text": "the man is boxing upwards",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9989284619289989,
      "Minus Multimodal Distance": -8.545414924621582,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.146260107518174e-05,
      "MoBERT-F": 0.3363394554718405,
      "MoBERT-N": 0.4263491229217984,
      "MoBERT-min(F/N)": 0.3363394554718405,
      "MoBERT-max(F/N)": 0.4263491229217984,
      "MotionCritic": -5.22101354598999,
      "VeMo (human-opt view)": 0.8080808080808081,
      "VeMo (max entropy view)": 0.8080808080808081,
      "VeMo (min entropy view)": 0.9049773755656109,
      "VeMo (random view)": 0.9049773755656109,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man is boxing upwards."
  },
  "006263": {
    "text": "walking backwards and then sitting.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5493559588483217,
      "Minus Multimodal Distance": -5.941439628601074,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9942435622215271,
      "MoBERT-F": 0.6592661066146976,
      "MoBERT-N": 0.6003973210399753,
      "MoBERT-min(F/N)": 0.6003973210399753,
      "MoBERT-max(F/N)": 0.6592661066146976,
      "MotionCritic": -4.738283634185791,
      "VeMo (human-opt view)": 0.8738738738738738,
      "VeMo (max entropy view)": 0.7976307996051333,
      "VeMo (min entropy view)": 0.8738738738738738,
      "VeMo (random view)": 0.8738738738738738,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking backwards and then sitting."
  },
  "005744": {
    "text": "man stands straight up with his hands out in front of him and creates a patting motion in the air continuously.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3007248295208364,
      "Minus Multimodal Distance": -9.092549324035645,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.320508742239326e-05,
      "MoBERT-F": 0.4070767076805867,
      "MoBERT-N": 0.44339696421342084,
      "MoBERT-min(F/N)": 0.4070767076805867,
      "MoBERT-max(F/N)": 0.44339696421342084,
      "MotionCritic": -7.9339070320129395,
      "VeMo (human-opt view)": 0.7983193277310925,
      "VeMo (max entropy view)": 0.39226519337016574,
      "VeMo (min entropy view)": 0.7983193277310925,
      "VeMo (random view)": 0.39226519337016574,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man stands straight up with his hands out in front of him and continuously creates a patting motion in the air."
  },
  "000687": {
    "text": "a person takes in big steps in a hurry walking into the rectangular area while hands are dangling and swinging.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7527941927042676,
      "Minus Multimodal Distance": -9.103602409362793,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.6598194241523743,
      "MoBERT-F": 0.7643878411817514,
      "MoBERT-N": 0.623783867928994,
      "MoBERT-min(F/N)": 0.623783867928994,
      "MoBERT-max(F/N)": 0.7643878411817514,
      "MotionCritic": -11.443486213684082,
      "VeMo (human-opt view)": 0.36344969199178645,
      "VeMo (max entropy view)": 0.36344969199178645,
      "VeMo (min entropy view)": 0.2682926829268293,
      "VeMo (random view)": 0.2682926829268293,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person takes big steps in a hurry, walking into the rectangular area while their hands are dangling and swinging."
  },
  "001483": {
    "text": "someone rubs their belly with their left hand and rubs their head with their right hand at the same time.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3175326384596919,
      "Minus Multimodal Distance": -7.764975070953369,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.877560397493653e-05,
      "MoBERT-F": 0.2722393006705879,
      "MoBERT-N": 0.3302655130385139,
      "MoBERT-min(F/N)": 0.2722393006705879,
      "MoBERT-max(F/N)": 0.3302655130385139,
      "MotionCritic": -6.28099250793457,
      "VeMo (human-opt view)": 0.21177802944507362,
      "VeMo (max entropy view)": 0.21177802944507362,
      "VeMo (min entropy view)": 0.007089832346317457,
      "VeMo (random view)": 0.21177802944507362,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone rubs their belly with their left hand and their head with their right hand at the same time."
  },
  "002317": {
    "text": "a person steps around something then sits down on the item and rests each hand on the corresponding thigh",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6889947594255581,
      "Minus Multimodal Distance": -4.535271644592285,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.920525730587542e-05,
      "MoBERT-F": 0.2970578776982122,
      "MoBERT-N": 0.4843718037795881,
      "MoBERT-min(F/N)": 0.2970578776982122,
      "MoBERT-max(F/N)": 0.4843718037795881,
      "MotionCritic": -12.463213920593262,
      "VeMo (human-opt view)": 0.8353808353808354,
      "VeMo (max entropy view)": 0.7548076923076923,
      "VeMo (min entropy view)": 0.8353808353808354,
      "VeMo (random view)": 0.8353808353808354,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person steps around something, then sits down on the item and rests each hand on the corresponding thigh."
  },
  "005799": {
    "text": "man walks forward while upper body is leaning slightly to the left and steps are unbalanced and slow.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4256332413143518,
      "Minus Multimodal Distance": -2.559309959411621,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 7.754291436867788e-05,
      "MoBERT-F": 0.35825486639774085,
      "MoBERT-N": 0.44238117407130595,
      "MoBERT-min(F/N)": 0.35825486639774085,
      "MoBERT-max(F/N)": 0.44238117407130595,
      "MotionCritic": -4.938415050506592,
      "VeMo (human-opt view)": 0.6371191135734072,
      "VeMo (max entropy view)": 0.6371191135734072,
      "VeMo (min entropy view)": 0.6666666666666666,
      "VeMo (random view)": 0.6371191135734072,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man walks forward with his upper body leaning slightly to the left. His steps are unbalanced and slow."
  },
  "014603": {
    "text": "a man raises his hands up on the air,starts clapping and then lowers them down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.48241016040847895,
      "Minus Multimodal Distance": -8.723854064941406,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.917675556091126e-05,
      "MoBERT-F": 0.30927433599736703,
      "MoBERT-N": 0.4478923867441678,
      "MoBERT-min(F/N)": 0.30927433599736703,
      "MoBERT-max(F/N)": 0.4478923867441678,
      "MotionCritic": -6.215295314788818,
      "VeMo (human-opt view)": 0.9961697517205598,
      "VeMo (max entropy view)": 0.9647517624118794,
      "VeMo (min entropy view)": 0.9961697517205598,
      "VeMo (random view)": 0.9961697517205598,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man raises his hands up in the air, starts clapping, and then lowers them down."
  },
  "002931": {
    "text": "a person grab with hands something and carries over to the other place",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8585901664416205,
      "Minus Multimodal Distance": -12.16244888305664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00843844935297966,
      "MoBERT-F": 0.549541700894935,
      "MoBERT-N": 0.446860996331179,
      "MoBERT-min(F/N)": 0.446860996331179,
      "MoBERT-max(F/N)": 0.549541700894935,
      "MotionCritic": -8.50436782836914,
      "VeMo (human-opt view)": 0.22291407222914073,
      "VeMo (max entropy view)": 0.22291407222914073,
      "VeMo (min entropy view)": 0.033128834355828224,
      "VeMo (random view)": 0.22291407222914073,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person grabs something with their hands and carries it to another place."
  },
  "009511": {
    "text": "figure does a quick small jump and then walks fowardsd and then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.32649057717632907,
      "Minus Multimodal Distance": -12.559261322021484,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9758390188217163,
      "MoBERT-F": 0.6278281806935685,
      "MoBERT-N": 0.5102940538374736,
      "MoBERT-min(F/N)": 0.5102940538374736,
      "MoBERT-max(F/N)": 0.6278281806935685,
      "MotionCritic": -8.799497604370117,
      "VeMo (human-opt view)": 0.5472636815920398,
      "VeMo (max entropy view)": 0.5472636815920398,
      "VeMo (min entropy view)": 0.5617529880478087,
      "VeMo (random view)": 0.5472636815920398,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The figure does a quick small jump, then walks forward and then stops."
  },
  "000759": {
    "text": "a person walks unbalanced as if they are on a tight rope.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5916093801917058,
      "Minus Multimodal Distance": -13.964752197265625,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.864116661134176e-05,
      "MoBERT-F": 0.37421640499954967,
      "MoBERT-N": 0.47572412311797135,
      "MoBERT-min(F/N)": 0.37421640499954967,
      "MoBERT-max(F/N)": 0.47572412311797135,
      "MotionCritic": -6.042303562164307,
      "VeMo (human-opt view)": 0.8809648662821186,
      "VeMo (max entropy view)": 0.7774193548387097,
      "VeMo (min entropy view)": 0.8809648662821186,
      "VeMo (random view)": 0.7774193548387097,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks unsteadily as if they are on a tightrope."
  },
  "008296": {
    "text": "the man dances his feet in circles in front of himself.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1061826953160827,
      "Minus Multimodal Distance": -3.990093946456909,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.834898441506084e-05,
      "MoBERT-F": 0.5298756617586988,
      "MoBERT-N": 0.5742371184707873,
      "MoBERT-min(F/N)": 0.5298756617586988,
      "MoBERT-max(F/N)": 0.5742371184707873,
      "MotionCritic": -7.685494422912598,
      "VeMo (human-opt view)": 0.1645274212368728,
      "VeMo (max entropy view)": 0.2693498452012384,
      "VeMo (min entropy view)": 0.1645274212368728,
      "VeMo (random view)": 0.1645274212368728,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man dances with his feet in circles in front of himself."
  },
  "009709": {
    "text": "the man in a fighting stance turns around to the right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6733849341471247,
      "Minus Multimodal Distance": -9.533299446105957,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00010535389446886256,
      "MoBERT-F": 0.418121373925863,
      "MoBERT-N": 0.5144445547310351,
      "MoBERT-min(F/N)": 0.418121373925863,
      "MoBERT-max(F/N)": 0.5144445547310351,
      "MotionCritic": -8.490203857421875,
      "VeMo (human-opt view)": 0.97556434721899,
      "VeMo (max entropy view)": 0.9688722571865963,
      "VeMo (min entropy view)": 0.97556434721899,
      "VeMo (random view)": 0.9688722571865963,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man in a fighting stance turns to the right."
  },
  "012679": {
    "text": "a person kneels down firstly on his right, then his left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5160288832261168,
      "Minus Multimodal Distance": -4.50412654876709,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9723061323165894,
      "MoBERT-F": 0.6525434615857326,
      "MoBERT-N": 0.5202382370788476,
      "MoBERT-min(F/N)": 0.5202382370788476,
      "MoBERT-max(F/N)": 0.6525434615857326,
      "MotionCritic": -2.732475996017456,
      "VeMo (human-opt view)": 0.874430709173715,
      "VeMo (max entropy view)": 0.874430709173715,
      "VeMo (min entropy view)": 0.8992460589444825,
      "VeMo (random view)": 0.8992460589444825,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person kneels down first on his right knee, then on his left knee."
  },
  "008401": {
    "text": "person squats then rotates a quarter of the way clockwise.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.071927180832748,
      "Minus Multimodal Distance": -11.654467582702637,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.943596882687416e-05,
      "MoBERT-F": 0.31912105087765186,
      "MoBERT-N": 0.36980982465104995,
      "MoBERT-min(F/N)": 0.31912105087765186,
      "MoBERT-max(F/N)": 0.36980982465104995,
      "MotionCritic": -4.878489971160889,
      "VeMo (human-opt view)": 0.9669134310318779,
      "VeMo (max entropy view)": 0.8934010152284264,
      "VeMo (min entropy view)": 0.9669134310318779,
      "VeMo (random view)": 0.8934010152284264,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person squats then rotates a quarter of the way clockwise."
  },
  "012567": {
    "text": "a person rests their hands on their knees while squatting.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.365950197888405,
      "Minus Multimodal Distance": -5.073536396026611,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.000230560137424618,
      "MoBERT-F": 0.45777308999807603,
      "MoBERT-N": 0.44099963790812635,
      "MoBERT-min(F/N)": 0.44099963790812635,
      "MoBERT-max(F/N)": 0.45777308999807603,
      "MotionCritic": -8.743661880493164,
      "VeMo (human-opt view)": 0.9841009181159961,
      "VeMo (max entropy view)": 0.9795536861782919,
      "VeMo (min entropy view)": 0.9841009181159961,
      "VeMo (random view)": 0.9795536861782919,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person rests their hands on their knees while squatting."
  },
  "012968": {
    "text": "this person zig zags forward then stops to the right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6617948992365515,
      "Minus Multimodal Distance": -12.202686309814453,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0002525209274608642,
      "MoBERT-F": 0.526553666404518,
      "MoBERT-N": 0.6003614697973945,
      "MoBERT-min(F/N)": 0.526553666404518,
      "MoBERT-max(F/N)": 0.6003614697973945,
      "MotionCritic": -0.5998103022575378,
      "VeMo (human-opt view)": 0.638121546961326,
      "VeMo (max entropy view)": 0.638121546961326,
      "VeMo (min entropy view)": 0.8171673819742489,
      "VeMo (random view)": 0.638121546961326,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person zigzags forward, then stops at the right."
  },
  "013898": {
    "text": "a person walks forward and to the left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6233414908584123,
      "Minus Multimodal Distance": -11.333640098571777,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.54891456279438e-05,
      "MoBERT-F": 0.38668031822481536,
      "MoBERT-N": 0.5379178034902777,
      "MoBERT-min(F/N)": 0.38668031822481536,
      "MoBERT-max(F/N)": 0.5379178034902777,
      "MotionCritic": -10.742754936218262,
      "VeMo (human-opt view)": 0.8872987477638641,
      "VeMo (max entropy view)": 0.8806963645673324,
      "VeMo (min entropy view)": 0.8872987477638641,
      "VeMo (random view)": 0.8872987477638641,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and to the left."
  },
  "006640": {
    "text": "a person grabbed something and throw it away",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5845078119608853,
      "Minus Multimodal Distance": -8.0304594039917,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9815624952316284,
      "MoBERT-F": 0.7376855383550901,
      "MoBERT-N": 0.7394643925323781,
      "MoBERT-min(F/N)": 0.7376855383550901,
      "MoBERT-max(F/N)": 0.7394643925323781,
      "MotionCritic": -5.250098705291748,
      "VeMo (human-opt view)": 0.7773972602739726,
      "VeMo (max entropy view)": 0.7773972602739726,
      "VeMo (min entropy view)": 0.8740894901144641,
      "VeMo (random view)": 0.7773972602739726,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person grabbed something and threw it away."
  },
  "008053": {
    "text": "the person is cleaning up some thing",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.647836995879883,
      "Minus Multimodal Distance": -5.983345985412598,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3721586330793798e-05,
      "MoBERT-F": 0.38461184449932173,
      "MoBERT-N": 0.46306642119348573,
      "MoBERT-min(F/N)": 0.38461184449932173,
      "MoBERT-max(F/N)": 0.46306642119348573,
      "MotionCritic": -5.875113010406494,
      "VeMo (human-opt view)": 0.6224188790560472,
      "VeMo (max entropy view)": 0.6224188790560472,
      "VeMo (min entropy view)": 0.28205128205128205,
      "VeMo (random view)": 0.6224188790560472,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is cleaning up something."
  },
  "008518": {
    "text": "person is leaving in a circular motion.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9228878431769678,
      "Minus Multimodal Distance": -11.915959358215332,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9185073799453676e-05,
      "MoBERT-F": 0.4816088483888549,
      "MoBERT-N": 0.6121243327828887,
      "MoBERT-min(F/N)": 0.4816088483888549,
      "MoBERT-max(F/N)": 0.6121243327828887,
      "MotionCritic": -13.316892623901367,
      "VeMo (human-opt view)": 0.515625,
      "VeMo (max entropy view)": 0.515625,
      "VeMo (min entropy view)": 0.37751004016064255,
      "VeMo (random view)": 0.37751004016064255,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is moving in a circular motion."
  },
  "009689": {
    "text": "a person walks forwards, sits.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6649153401267047,
      "Minus Multimodal Distance": -4.33140754699707,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.4945649278815836e-05,
      "MoBERT-F": 0.4013900446403277,
      "MoBERT-N": 0.4716370996001338,
      "MoBERT-min(F/N)": 0.4013900446403277,
      "MoBERT-max(F/N)": 0.4716370996001338,
      "MotionCritic": -6.352774143218994,
      "VeMo (human-opt view)": 0.9944506104328524,
      "VeMo (max entropy view)": 0.7984570877531341,
      "VeMo (min entropy view)": 0.9944506104328524,
      "VeMo (random view)": 0.9944506104328524,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and sits."
  },
  "004284": {
    "text": "a person walks with side steps to the right and then walks forward, then turn around and walks back",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.779686708465875,
      "Minus Multimodal Distance": -10.456928253173828,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4906803446356207e-05,
      "MoBERT-F": 0.38516547345889773,
      "MoBERT-N": 0.6067689324154597,
      "MoBERT-min(F/N)": 0.38516547345889773,
      "MoBERT-max(F/N)": 0.6067689324154597,
      "MotionCritic": -13.54570484161377,
      "VeMo (human-opt view)": 0.8170212765957446,
      "VeMo (max entropy view)": 0.6781857451403888,
      "VeMo (min entropy view)": 0.8170212765957446,
      "VeMo (random view)": 0.8170212765957446,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks with side steps to the right, then walks forward, then turns around and walks back."
  },
  "004336": {
    "text": "a person throws things to the right; first something underhand, then something overhand, and then something very far.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8832014878710275,
      "Minus Multimodal Distance": -2.479867935180664,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.806983608636074e-05,
      "MoBERT-F": 0.4257514437342955,
      "MoBERT-N": 0.47313569329790484,
      "MoBERT-min(F/N)": 0.4257514437342955,
      "MoBERT-max(F/N)": 0.47313569329790484,
      "MotionCritic": -4.831946849822998,
      "VeMo (human-opt view)": 0.6072106261859582,
      "VeMo (max entropy view)": 0.6072106261859582,
      "VeMo (min entropy view)": 0.7658536585365854,
      "VeMo (random view)": 0.6072106261859582,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person throws things to the right: first something underhand, then something overhand, and then something very far."
  },
  "013974": {
    "text": "a person is standing and adjusts their weight to lean more on their left side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.17352762800597674,
      "Minus Multimodal Distance": -4.409892559051514,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.5541776014724746e-05,
      "MoBERT-F": 0.2853461837312851,
      "MoBERT-N": 0.38896796713234766,
      "MoBERT-min(F/N)": 0.2853461837312851,
      "MoBERT-max(F/N)": 0.38896796713234766,
      "MotionCritic": -4.282907962799072,
      "VeMo (human-opt view)": 0.25646123260437376,
      "VeMo (max entropy view)": 0.25646123260437376,
      "VeMo (min entropy view)": 0.2454780361757106,
      "VeMo (random view)": 0.25646123260437376,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is standing and adjusts their weight to lean more on their left side."
  },
  "007628": {
    "text": "person moves forward two steps then does a full turn and faces opposite direction",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.332303387216447,
      "Minus Multimodal Distance": -11.29268741607666,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0007639435934834182,
      "MoBERT-F": 0.522281596702423,
      "MoBERT-N": 0.6671582598142051,
      "MoBERT-min(F/N)": 0.522281596702423,
      "MoBERT-max(F/N)": 0.6671582598142051,
      "MotionCritic": -13.496236801147461,
      "VeMo (human-opt view)": 0.967062818336163,
      "VeMo (max entropy view)": 0.7878211227402474,
      "VeMo (min entropy view)": 0.967062818336163,
      "VeMo (random view)": 0.967062818336163,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves forward two steps, then does a full turn and faces the opposite direction."
  },
  "006774": {
    "text": "a person raised the hand and start to make some pushs",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7509563284115647,
      "Minus Multimodal Distance": -10.64151382446289,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.719965363619849e-05,
      "MoBERT-F": 0.2634004583299878,
      "MoBERT-N": 0.3520945070206167,
      "MoBERT-min(F/N)": 0.2634004583299878,
      "MoBERT-max(F/N)": 0.3520945070206167,
      "MotionCritic": -10.054988861083984,
      "VeMo (human-opt view)": 0.8670181605155243,
      "VeMo (max entropy view)": 0.8670181605155243,
      "VeMo (min entropy view)": 0.9197080291970803,
      "VeMo (random view)": 0.9197080291970803,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raised the hand and started to make some pushes."
  },
  "008055": {
    "text": "a figure raises their right hand in a sweeping motion",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.24491866048934277,
      "Minus Multimodal Distance": -6.923983573913574,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.950820246245712e-05,
      "MoBERT-F": 0.3002441580105991,
      "MoBERT-N": 0.35819348758352426,
      "MoBERT-min(F/N)": 0.3002441580105991,
      "MoBERT-max(F/N)": 0.35819348758352426,
      "MotionCritic": -9.80436897277832,
      "VeMo (human-opt view)": 0.651685393258427,
      "VeMo (max entropy view)": 0.651685393258427,
      "VeMo (min entropy view)": 0.6520874751491054,
      "VeMo (random view)": 0.651685393258427,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure raises their right hand in a sweeping motion."
  },
  "001636": {
    "text": "a person steps to their left and sits down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5645930966435229,
      "Minus Multimodal Distance": -1.5791070461273193,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.05824572592973709,
      "MoBERT-F": 0.5132562053965252,
      "MoBERT-N": 0.528282687065267,
      "MoBERT-min(F/N)": 0.5132562053965252,
      "MoBERT-max(F/N)": 0.528282687065267,
      "MotionCritic": -9.817499160766602,
      "VeMo (human-opt view)": 0.9580386610089581,
      "VeMo (max entropy view)": 0.7311827956989247,
      "VeMo (min entropy view)": 0.9580386610089581,
      "VeMo (random view)": 0.7311827956989247,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps to their left and sits down."
  },
  "003583": {
    "text": "a person waves both arms in the air.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6851459612110529,
      "Minus Multimodal Distance": -8.921048164367676,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.654848140082322e-05,
      "MoBERT-F": 0.3680918620319985,
      "MoBERT-N": 0.42711339495011447,
      "MoBERT-min(F/N)": 0.3680918620319985,
      "MoBERT-max(F/N)": 0.42711339495011447,
      "MotionCritic": -6.338477611541748,
      "VeMo (human-opt view)": 0.9941032647777938,
      "VeMo (max entropy view)": 0.9924190213645762,
      "VeMo (min entropy view)": 0.9941032647777938,
      "VeMo (random view)": 0.9941032647777938,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person waves both arms in the air."
  },
  "000490": {
    "text": "a person punches the air with their arms.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7330773041204999,
      "Minus Multimodal Distance": -11.413226127624512,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.809971738315653e-05,
      "MoBERT-F": 0.4251293715377538,
      "MoBERT-N": 0.4342497926922855,
      "MoBERT-min(F/N)": 0.4251293715377538,
      "MoBERT-max(F/N)": 0.4342497926922855,
      "MotionCritic": -7.278678894042969,
      "VeMo (human-opt view)": 0.8593091828138163,
      "VeMo (max entropy view)": 0.8593091828138163,
      "VeMo (min entropy view)": 0.9152086137281292,
      "VeMo (random view)": 0.9152086137281292,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person punches the air with their arms."
  },
  "008077": {
    "text": "a person walks a path that bends left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.29035363711032136,
      "Minus Multimodal Distance": -5.920461654663086,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8780139473383315e-05,
      "MoBERT-F": 0.4071198336705064,
      "MoBERT-N": 0.5143651052413364,
      "MoBERT-min(F/N)": 0.4071198336705064,
      "MoBERT-max(F/N)": 0.5143651052413364,
      "MotionCritic": -10.414401054382324,
      "VeMo (human-opt view)": 0.9048473967684022,
      "VeMo (max entropy view)": 0.8740458015267175,
      "VeMo (min entropy view)": 0.9048473967684022,
      "VeMo (random view)": 0.8740458015267175,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks along a path that bends to the left."
  },
  "007890": {
    "text": "a person glides in circles in a counter-clockwise.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4398827430013628,
      "Minus Multimodal Distance": -4.379891395568848,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.913923501386307e-05,
      "MoBERT-F": 0.4487915011443586,
      "MoBERT-N": 0.5299953072424519,
      "MoBERT-min(F/N)": 0.4487915011443586,
      "MoBERT-max(F/N)": 0.5299953072424519,
      "MotionCritic": -11.75221061706543,
      "VeMo (human-opt view)": 0.002977292112589922,
      "VeMo (max entropy view)": 0.00856297680211739,
      "VeMo (min entropy view)": 0.002977292112589922,
      "VeMo (random view)": 0.002977292112589922,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person glides in circles in a counter - clockwise direction."
  },
  "000954": {
    "text": "a person is swinging a tennis racket.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9145362189187286,
      "Minus Multimodal Distance": -12.417572021484375,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.25699689984321594,
      "MoBERT-F": 0.7387960799929476,
      "MoBERT-N": 0.6564628530944062,
      "MoBERT-min(F/N)": 0.6564628530944062,
      "MoBERT-max(F/N)": 0.7387960799929476,
      "MotionCritic": -6.1718525886535645,
      "VeMo (human-opt view)": 0.0004880429477794046,
      "VeMo (max entropy view)": 0.0004880429477794046,
      "VeMo (min entropy view)": 4.684496484887681e-05,
      "VeMo (random view)": 0.0004880429477794046,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is swinging a tennis racket."
  },
  "000421": {
    "text": "a person slowly walks down some stairs.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6152366317201899,
      "Minus Multimodal Distance": -6.55496072769165,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0002653496921993792,
      "MoBERT-F": 0.5576691174419247,
      "MoBERT-N": 0.6076076293762525,
      "MoBERT-min(F/N)": 0.5576691174419247,
      "MoBERT-max(F/N)": 0.6076076293762525,
      "MotionCritic": -6.873056888580322,
      "VeMo (human-opt view)": 0.04751001717229536,
      "VeMo (max entropy view)": 0.053469852104664393,
      "VeMo (min entropy view)": 0.04751001717229536,
      "VeMo (random view)": 0.053469852104664393,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person slowly walks down some stairs."
  },
  "003116": {
    "text": "person walks slightly to the right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.535823772052967,
      "Minus Multimodal Distance": -3.048299551010132,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.2901203162036836e-05,
      "MoBERT-F": 0.3364729814934858,
      "MoBERT-N": 0.4495878529901599,
      "MoBERT-min(F/N)": 0.3364729814934858,
      "MoBERT-max(F/N)": 0.4495878529901599,
      "MotionCritic": -1.903448224067688,
      "VeMo (human-opt view)": 0.9578713968957872,
      "VeMo (max entropy view)": 0.9578713968957872,
      "VeMo (min entropy view)": 0.9578713968957872,
      "VeMo (random view)": 0.9578713968957872,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks slightly to the right."
  },
  "004008": {
    "text": "a person walks quickly in a diagonal direction for 8 steps and then turns to walk in the direction they came from.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9267940688369852,
      "Minus Multimodal Distance": -8.126280784606934,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8131588101387024,
      "MoBERT-F": 0.6543682632642138,
      "MoBERT-N": 0.6359880795013105,
      "MoBERT-min(F/N)": 0.6359880795013105,
      "MoBERT-max(F/N)": 0.6543682632642138,
      "MotionCritic": -16.76272964477539,
      "VeMo (human-opt view)": 0.8349900596421471,
      "VeMo (max entropy view)": 0.7984570877531341,
      "VeMo (min entropy view)": 0.8349900596421471,
      "VeMo (random view)": 0.8349900596421471,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks quickly in a diagonal direction for 8 steps and then turns to walk in the direction they came from."
  },
  "005039": {
    "text": "a person walks forward but slowly curves towards the left as they are walking and then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.26984014714347243,
      "Minus Multimodal Distance": -7.4712324142456055,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.6017070417292416e-05,
      "MoBERT-F": 0.3428428340875711,
      "MoBERT-N": 0.4429475856374442,
      "MoBERT-min(F/N)": 0.3428428340875711,
      "MoBERT-max(F/N)": 0.4429475856374442,
      "MotionCritic": -10.902994155883789,
      "VeMo (human-opt view)": 0.9796376913354866,
      "VeMo (max entropy view)": 0.9722849517552864,
      "VeMo (min entropy view)": 0.9796376913354866,
      "VeMo (random view)": 0.9722849517552864,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward but slowly curves to the left as they walk, and then stops."
  },
  "006514": {
    "text": "a person puts their hands on their knee, then gets up and walks towards the right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9466792221566269,
      "Minus Multimodal Distance": -2.444772481918335,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.16051173210144043,
      "MoBERT-F": 0.625938466207416,
      "MoBERT-N": 0.4904351248156229,
      "MoBERT-min(F/N)": 0.4904351248156229,
      "MoBERT-max(F/N)": 0.625938466207416,
      "MotionCritic": -5.194709300994873,
      "VeMo (human-opt view)": 0.964824120603015,
      "VeMo (max entropy view)": 0.8991248272685398,
      "VeMo (min entropy view)": 0.964824120603015,
      "VeMo (random view)": 0.964824120603015,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person puts their hands on their knees, then gets up and walks towards the right."
  },
  "004817": {
    "text": "stick man walking straightforward in wobbling position maybe taking a dui test.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.681117528758702,
      "Minus Multimodal Distance": -10.925420761108398,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9663011446245946e-05,
      "MoBERT-F": 0.3410231162282208,
      "MoBERT-N": 0.5050183436747832,
      "MoBERT-min(F/N)": 0.3410231162282208,
      "MoBERT-max(F/N)": 0.5050183436747832,
      "MotionCritic": -2.113494396209717,
      "VeMo (human-opt view)": 0.8736532810969637,
      "VeMo (max entropy view)": 0.8736532810969637,
      "VeMo (min entropy view)": 0.8741965105601469,
      "VeMo (random view)": 0.8741965105601469,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking straightforward in a wobbling position, perhaps taking a DUI test."
  },
  "013088": {
    "text": "the person picks up the bottle of shampoo to wash the dog",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6225220519746129,
      "Minus Multimodal Distance": -9.645506858825684,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.215999458916485e-05,
      "MoBERT-F": 0.2907253605582963,
      "MoBERT-N": 0.3746876386864291,
      "MoBERT-min(F/N)": 0.2907253605582963,
      "MoBERT-max(F/N)": 0.3746876386864291,
      "MotionCritic": 1.2671934366226196,
      "VeMo (human-opt view)": 1.2221010403470654e-05,
      "VeMo (max entropy view)": 1.260120071441093e-05,
      "VeMo (min entropy view)": 1.2221010403470654e-05,
      "VeMo (random view)": 1.260120071441093e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person picks up the bottle of shampoo to wash the dog."
  },
  "003373": {
    "text": "the stick figure is walking in form of a back wards letter j.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1439179619781934,
      "Minus Multimodal Distance": -10.30746078491211,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9896597266197205,
      "MoBERT-F": 0.6643843979758923,
      "MoBERT-N": 0.6488314413084981,
      "MoBERT-min(F/N)": 0.6488314413084981,
      "MoBERT-max(F/N)": 0.6643843979758923,
      "MotionCritic": -6.664418697357178,
      "VeMo (human-opt view)": 0.7056179775280899,
      "VeMo (max entropy view)": 0.7056179775280899,
      "VeMo (min entropy view)": 0.7875816993464052,
      "VeMo (random view)": 0.7875816993464052,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The stick figure is walking in the form of a backwards letter J."
  },
  "004517": {
    "text": "a person standing points at something by lifting their right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.21149175530583697,
      "Minus Multimodal Distance": -3.792241334915161,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.8472225898876786e-05,
      "MoBERT-F": 0.3074074343180993,
      "MoBERT-N": 0.34817664863287956,
      "MoBERT-min(F/N)": 0.3074074343180993,
      "MoBERT-max(F/N)": 0.34817664863287956,
      "MotionCritic": -6.4066290855407715,
      "VeMo (human-opt view)": 0.9808623838640514,
      "VeMo (max entropy view)": 0.9808623838640514,
      "VeMo (min entropy view)": 0.9859062602425435,
      "VeMo (random view)": 0.9808623838640514,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person standing points at something by lifting their right hand."
  },
  "014472": {
    "text": "a person dances briefly and then performs a backflip.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4925808536099208,
      "Minus Multimodal Distance": -8.370857238769531,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9969131946563721,
      "MoBERT-F": 0.8130475472159395,
      "MoBERT-N": 0.7787972860974499,
      "MoBERT-min(F/N)": 0.7787972860974499,
      "MoBERT-max(F/N)": 0.8130475472159395,
      "MotionCritic": -8.137338638305664,
      "VeMo (human-opt view)": 0.9580326116011761,
      "VeMo (max entropy view)": 0.9283135636926251,
      "VeMo (min entropy view)": 0.9580326116011761,
      "VeMo (random view)": 0.9283135636926251,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person briefly dances and then performs a backflip."
  },
  "008556": {
    "text": "a person gets down and crawls around the floor.a",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9710115231375575,
      "Minus Multimodal Distance": -3.705303430557251,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9673385620117188,
      "MoBERT-F": 0.766178144843946,
      "MoBERT-N": 0.7565671269215474,
      "MoBERT-min(F/N)": 0.7565671269215474,
      "MoBERT-max(F/N)": 0.766178144843946,
      "MotionCritic": -7.125477313995361,
      "VeMo (human-opt view)": 0.9987570777516918,
      "VeMo (max entropy view)": 0.9978226391763663,
      "VeMo (min entropy view)": 0.9987570777516918,
      "VeMo (random view)": 0.9987570777516918,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person gets down and crawls around the floor."
  },
  "004615": {
    "text": "someone is sliding an object to the left",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9501687807311292,
      "Minus Multimodal Distance": -5.580944061279297,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4976947315735742e-05,
      "MoBERT-F": 0.32718722673784045,
      "MoBERT-N": 0.4625236848996758,
      "MoBERT-min(F/N)": 0.32718722673784045,
      "MoBERT-max(F/N)": 0.4625236848996758,
      "MotionCritic": -7.271498680114746,
      "VeMo (human-opt view)": 0.04719894133215704,
      "VeMo (max entropy view)": 0.1260014566642389,
      "VeMo (min entropy view)": 0.04719894133215704,
      "VeMo (random view)": 0.04719894133215704,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone is sliding an object to the left."
  },
  "004364": {
    "text": "the person is wiggling his whole body.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6877791491161331,
      "Minus Multimodal Distance": -6.057145595550537,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.636987796402536e-05,
      "MoBERT-F": 0.5270098476399846,
      "MoBERT-N": 0.5487843207882479,
      "MoBERT-min(F/N)": 0.5270098476399846,
      "MoBERT-max(F/N)": 0.5487843207882479,
      "MotionCritic": -2.772205352783203,
      "VeMo (human-opt view)": 0.48375451263537905,
      "VeMo (max entropy view)": 0.48375451263537905,
      "VeMo (min entropy view)": 0.6513761467889908,
      "VeMo (random view)": 0.48375451263537905,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is wiggling his whole body."
  },
  "004776": {
    "text": "a man moving like a chicken.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4115183212108012,
      "Minus Multimodal Distance": -8.686752319335938,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.829496588674374e-05,
      "MoBERT-F": 0.44335558170215417,
      "MoBERT-N": 0.46175469864877944,
      "MoBERT-min(F/N)": 0.44335558170215417,
      "MoBERT-max(F/N)": 0.46175469864877944,
      "MotionCritic": -7.7492804527282715,
      "VeMo (human-opt view)": 0.002325727575586146,
      "VeMo (max entropy view)": 0.02158273381294964,
      "VeMo (min entropy view)": 0.002325727575586146,
      "VeMo (random view)": 0.002325727575586146,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is moving like a chicken."
  },
  "007355": {
    "text": "the person extending their left leg.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6610193146583634,
      "Minus Multimodal Distance": -9.846783638000488,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.057057620026171e-05,
      "MoBERT-F": 0.4605078277172401,
      "MoBERT-N": 0.4608330770430635,
      "MoBERT-min(F/N)": 0.4605078277172401,
      "MoBERT-max(F/N)": 0.4608330770430635,
      "MotionCritic": -7.502195358276367,
      "VeMo (human-opt view)": 0.9552238805970149,
      "VeMo (max entropy view)": 0.9465824238943136,
      "VeMo (min entropy view)": 0.9552238805970149,
      "VeMo (random view)": 0.9465824238943136,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is extending their left leg."
  },
  "013747": {
    "text": "a figure leans on its left leg, spine arched in an agressive posture slightly swaying its shoulders",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.737193925682307,
      "Minus Multimodal Distance": -4.168684005737305,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.206706216791645e-05,
      "MoBERT-F": 0.512190992359742,
      "MoBERT-N": 0.5008681315580878,
      "MoBERT-min(F/N)": 0.5008681315580878,
      "MoBERT-max(F/N)": 0.512190992359742,
      "MotionCritic": -12.271512985229492,
      "VeMo (human-opt view)": 0.8355263157894737,
      "VeMo (max entropy view)": 0.8355263157894737,
      "VeMo (min entropy view)": 0.8436724565756824,
      "VeMo (random view)": 0.8355263157894737,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure leans on its left leg, with its spine arched in an aggressive posture, slightly swaying its shoulders."
  },
  "006215": {
    "text": "a figure seems to gesture contritely or sincerely with their left hand as though entreating someone",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.171576405281336,
      "Minus Multimodal Distance": -7.349857330322266,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.4832792152883485e-05,
      "MoBERT-F": 0.31414703877269334,
      "MoBERT-N": 0.4005114669242901,
      "MoBERT-min(F/N)": 0.31414703877269334,
      "MoBERT-max(F/N)": 0.4005114669242901,
      "MotionCritic": -8.866138458251953,
      "VeMo (human-opt view)": 0.5921052631578947,
      "VeMo (max entropy view)": 0.5921052631578947,
      "VeMo (min entropy view)": 0.7431906614785992,
      "VeMo (random view)": 0.5921052631578947,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person seems to gesture contritely or sincerely with their left hand as though entreating someone."
  },
  "011935": {
    "text": "a person walks down a hill and places a box on the ground.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6353383464153697,
      "Minus Multimodal Distance": -3.1165904998779297,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.398985634499695e-05,
      "MoBERT-F": 0.3576477458015015,
      "MoBERT-N": 0.4342251420667394,
      "MoBERT-min(F/N)": 0.3576477458015015,
      "MoBERT-max(F/N)": 0.4342251420667394,
      "MotionCritic": -5.154544353485107,
      "VeMo (human-opt view)": 2.9312077660141315e-05,
      "VeMo (max entropy view)": 5.843450076808905e-05,
      "VeMo (min entropy view)": 2.9312077660141315e-05,
      "VeMo (random view)": 5.843450076808905e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks down a hill and places a box on the ground."
  },
  "007644": {
    "text": "a person walks in a left diagonal then stops with hands slightly raised.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9767523281336435,
      "Minus Multimodal Distance": -5.452387809753418,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.258756234776229e-05,
      "MoBERT-F": 0.4555217123239049,
      "MoBERT-N": 0.5251585829184736,
      "MoBERT-min(F/N)": 0.4555217123239049,
      "MoBERT-max(F/N)": 0.5251585829184736,
      "MotionCritic": -14.296934127807617,
      "VeMo (human-opt view)": 0.9687600644122383,
      "VeMo (max entropy view)": 0.9045473166734944,
      "VeMo (min entropy view)": 0.9687600644122383,
      "VeMo (random view)": 0.9045473166734944,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks diagonally to the left and then stops with their hands slightly raised."
  },
  "000820": {
    "text": "using his left hand he smacks his right arm bicep.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.49175747387087604,
      "Minus Multimodal Distance": -7.480032444000244,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.6256107200169936e-05,
      "MoBERT-F": 0.2720378388698186,
      "MoBERT-N": 0.37067117450205933,
      "MoBERT-min(F/N)": 0.2720378388698186,
      "MoBERT-max(F/N)": 0.37067117450205933,
      "MotionCritic": -7.700135707855225,
      "VeMo (human-opt view)": 0.3486088379705401,
      "VeMo (max entropy view)": 0.43765281173594134,
      "VeMo (min entropy view)": 0.3486088379705401,
      "VeMo (random view)": 0.43765281173594134,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Using his left hand, he smacks the bicep of his right arm."
  },
  "006314": {
    "text": "this person walks slowly forward in a zig zag.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.32359307612970867,
      "Minus Multimodal Distance": -2.6481704711914062,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4460876375087537e-05,
      "MoBERT-F": 0.40353592171958935,
      "MoBERT-N": 0.4675249606512216,
      "MoBERT-min(F/N)": 0.40353592171958935,
      "MoBERT-max(F/N)": 0.4675249606512216,
      "MotionCritic": -1.6709171533584595,
      "VeMo (human-opt view)": 0.5927342256214149,
      "VeMo (max entropy view)": 0.5927342256214149,
      "VeMo (min entropy view)": 0.6519174041297935,
      "VeMo (random view)": 0.6519174041297935,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person walks slowly forward in a zig - zag."
  },
  "008888": {
    "text": "person is doing across punch to the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.651666472692848,
      "Minus Multimodal Distance": -3.548804521560669,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.362165105296299e-05,
      "MoBERT-F": 0.30584364824404564,
      "MoBERT-N": 0.43719374312290543,
      "MoBERT-min(F/N)": 0.30584364824404564,
      "MoBERT-max(F/N)": 0.43719374312290543,
      "MotionCritic": -4.4731035232543945,
      "VeMo (human-opt view)": 0.0010362694300518134,
      "VeMo (max entropy view)": 0.0014093013891685123,
      "VeMo (min entropy view)": 0.0010362694300518134,
      "VeMo (random view)": 0.0014093013891685123,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is doing a cross punch to the right."
  },
  "003119": {
    "text": "a person standing and acting like a chicken.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5160961867337679,
      "Minus Multimodal Distance": -4.654005527496338,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.199254933861084e-05,
      "MoBERT-F": 0.36872378902702363,
      "MoBERT-N": 0.43616288011206017,
      "MoBERT-min(F/N)": 0.36872378902702363,
      "MoBERT-max(F/N)": 0.43616288011206017,
      "MotionCritic": -4.90814733505249,
      "VeMo (human-opt view)": 0.0026319434824767975,
      "VeMo (max entropy view)": 0.02596849723286505,
      "VeMo (min entropy view)": 0.0026319434824767975,
      "VeMo (random view)": 0.0026319434824767975,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing and acting like a chicken."
  },
  "012321": {
    "text": "a person throws something and catches something.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6373180020561879,
      "Minus Multimodal Distance": -3.7129790782928467,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00032590061891824007,
      "MoBERT-F": 0.44578068729756626,
      "MoBERT-N": 0.5132245931665473,
      "MoBERT-min(F/N)": 0.44578068729756626,
      "MoBERT-max(F/N)": 0.5132245931665473,
      "MotionCritic": -4.782205104827881,
      "VeMo (human-opt view)": 0.3626707132018209,
      "VeMo (max entropy view)": 0.5769230769230769,
      "VeMo (min entropy view)": 0.3626707132018209,
      "VeMo (random view)": 0.3626707132018209,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person throws something and catches something."
  },
  "000091": {
    "text": "a person lays on the ground.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.397083042285523,
      "Minus Multimodal Distance": -2.668088912963867,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.7224506139755249,
      "MoBERT-F": 0.6416930798762979,
      "MoBERT-N": 0.4863606474275778,
      "MoBERT-min(F/N)": 0.4863606474275778,
      "MoBERT-max(F/N)": 0.6416930798762979,
      "MotionCritic": -1.3227862119674683,
      "VeMo (human-opt view)": 0.9941032647777938,
      "VeMo (max entropy view)": 0.9941032647777938,
      "VeMo (min entropy view)": 0.9944465226023652,
      "VeMo (random view)": 0.9944465226023652,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lies on the ground."
  },
  "007354": {
    "text": "he starts to crawl a lot",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1351463144672855,
      "Minus Multimodal Distance": -12.520231246948242,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9840199947357178,
      "MoBERT-F": 0.7395908947508965,
      "MoBERT-N": 0.7530126312650627,
      "MoBERT-min(F/N)": 0.7395908947508965,
      "MoBERT-max(F/N)": 0.7530126312650627,
      "MotionCritic": -10.559309005737305,
      "VeMo (human-opt view)": 0.9840662842574889,
      "VeMo (max entropy view)": 0.9723435225618632,
      "VeMo (min entropy view)": 0.9840662842574889,
      "VeMo (random view)": 0.9840662842574889,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person starts to crawl a lot."
  },
  "009972": {
    "text": "a person is doing jumping jacks.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5628626773017655,
      "Minus Multimodal Distance": -2.914975166320801,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9650976061820984,
      "MoBERT-F": 0.8303874369624694,
      "MoBERT-N": 0.8053625961769196,
      "MoBERT-min(F/N)": 0.8053625961769196,
      "MoBERT-max(F/N)": 0.8303874369624694,
      "MotionCritic": -4.337634086608887,
      "VeMo (human-opt view)": 0.9525200876552228,
      "VeMo (max entropy view)": 0.9433314253005152,
      "VeMo (min entropy view)": 0.9525200876552228,
      "VeMo (random view)": 0.9433314253005152,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is doing jumping jacks."
  },
  "011257": {
    "text": "a person throws something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6360092943155469,
      "Minus Multimodal Distance": -10.57931900024414,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.07241775840520859,
      "MoBERT-F": 0.6463812311200436,
      "MoBERT-N": 0.6323940250039012,
      "MoBERT-min(F/N)": 0.6323940250039012,
      "MoBERT-max(F/N)": 0.6463812311200436,
      "MotionCritic": -7.9222731590271,
      "VeMo (human-opt view)": 0.9465400271370421,
      "VeMo (max entropy view)": 0.9465400271370421,
      "VeMo (min entropy view)": 0.9604365620736699,
      "VeMo (random view)": 0.9604365620736699,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person throws something."
  },
  "008388": {
    "text": "[from 0s to 5s] a person performs a ballerina balance pose, first on their left leg, before centralising and swapping to their right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6781765946135194,
      "Minus Multimodal Distance": -3.5728976726531982,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.053653303533792496,
      "MoBERT-F": 0.6718562491425003,
      "MoBERT-N": 0.5825889711284897,
      "MoBERT-min(F/N)": 0.5825889711284897,
      "MoBERT-max(F/N)": 0.6718562491425003,
      "MotionCritic": -7.393910884857178,
      "VeMo (human-opt view)": 0.778021978021978,
      "VeMo (max entropy view)": 0.6520376175548589,
      "VeMo (min entropy view)": 0.778021978021978,
      "VeMo (random view)": 0.778021978021978,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person performs a ballerina balance pose, first on their left leg, before centralizing and swapping to their right."
  },
  "008009": {
    "text": "a person picks something up with his right hand and bring it up to his face",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.13624132484639545,
      "Minus Multimodal Distance": -4.856091499328613,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.453359360923059e-05,
      "MoBERT-F": 0.2670146396889333,
      "MoBERT-N": 0.3464228083870282,
      "MoBERT-min(F/N)": 0.2670146396889333,
      "MoBERT-max(F/N)": 0.3464228083870282,
      "MotionCritic": -7.129283428192139,
      "VeMo (human-opt view)": 0.3768545994065282,
      "VeMo (max entropy view)": 0.5314285714285715,
      "VeMo (min entropy view)": 0.3768545994065282,
      "VeMo (random view)": 0.3768545994065282,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person picks something up with his right hand and brings it up to his face."
  },
  "005676": {
    "text": "a man continuing bending forward at the waist with his arms dangling in front of him.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4984759281455494,
      "Minus Multimodal Distance": -9.891194343566895,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.001321192947216332,
      "MoBERT-F": 0.5355847875676292,
      "MoBERT-N": 0.44107646907434256,
      "MoBERT-min(F/N)": 0.44107646907434256,
      "MoBERT-max(F/N)": 0.5355847875676292,
      "MotionCritic": -3.116424083709717,
      "VeMo (human-opt view)": 0.6924034869240349,
      "VeMo (max entropy view)": 0.6924034869240349,
      "VeMo (min entropy view)": 0.7769230769230769,
      "VeMo (random view)": 0.6924034869240349,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man continues to bend forward at the waist with his arms dangling in front of him."
  },
  "014192": {
    "text": "a person walks forward and then appears to bump into something, then continues walking forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6132221016255841,
      "Minus Multimodal Distance": -7.614607810974121,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0003247025888413191,
      "MoBERT-F": 0.4496257207845693,
      "MoBERT-N": 0.514545135136469,
      "MoBERT-min(F/N)": 0.4496257207845693,
      "MoBERT-max(F/N)": 0.514545135136469,
      "MotionCritic": -0.21197167038917542,
      "VeMo (human-opt view)": 0.7181818181818181,
      "VeMo (max entropy view)": 0.7181818181818181,
      "VeMo (min entropy view)": 0.7541401273885351,
      "VeMo (random view)": 0.7181818181818181,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, then appears to bump into something, and then continues walking forward."
  },
  "004973": {
    "text": "a person does one squat with arms straight out in front.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4791526093419721,
      "Minus Multimodal Distance": -7.718827724456787,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.871946293860674e-05,
      "MoBERT-F": 0.4065126904898362,
      "MoBERT-N": 0.467548214114113,
      "MoBERT-min(F/N)": 0.4065126904898362,
      "MoBERT-max(F/N)": 0.467548214114113,
      "MotionCritic": -6.653318881988525,
      "VeMo (human-opt view)": 0.9363077917879081,
      "VeMo (max entropy view)": 0.8739495798319328,
      "VeMo (min entropy view)": 0.9363077917879081,
      "VeMo (random view)": 0.9363077917879081,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person does one squat with their arms straight out in front."
  },
  "004947": {
    "text": "a person sits down then uses left arm to push up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7770199947976478,
      "Minus Multimodal Distance": -12.271143913269043,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.011809219606220722,
      "MoBERT-F": 0.40353524830041054,
      "MoBERT-N": 0.42572059127673034,
      "MoBERT-min(F/N)": 0.40353524830041054,
      "MoBERT-max(F/N)": 0.42572059127673034,
      "MotionCritic": -6.102517604827881,
      "VeMo (human-opt view)": 0.9467107680999632,
      "VeMo (max entropy view)": 0.836027713625866,
      "VeMo (min entropy view)": 0.9467107680999632,
      "VeMo (random view)": 0.836027713625866,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits down and then uses the left arm to push up."
  },
  "002272": {
    "text": "someone getting into position to start catching and tossing an object.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5781015855584704,
      "Minus Multimodal Distance": -10.305081367492676,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3899017833173275e-05,
      "MoBERT-F": 0.42484977254443024,
      "MoBERT-N": 0.45592402808193805,
      "MoBERT-min(F/N)": 0.42484977254443024,
      "MoBERT-max(F/N)": 0.45592402808193805,
      "MotionCritic": -7.60750150680542,
      "VeMo (human-opt view)": 0.8083333333333333,
      "VeMo (max entropy view)": 0.8083333333333333,
      "VeMo (min entropy view)": 0.859541130386122,
      "VeMo (random view)": 0.8083333333333333,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone is getting into position to start catching and tossing an object."
  },
  "011189": {
    "text": "a person walking down and taking a left turn",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.46414623175395653,
      "Minus Multimodal Distance": -9.154570579528809,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.554628372308798e-05,
      "MoBERT-F": 0.33556535845678315,
      "MoBERT-N": 0.482724184746115,
      "MoBERT-min(F/N)": 0.33556535845678315,
      "MoBERT-max(F/N)": 0.482724184746115,
      "MotionCritic": -9.406278610229492,
      "VeMo (human-opt view)": 0.8933500627352572,
      "VeMo (max entropy view)": 0.8356545961002786,
      "VeMo (min entropy view)": 0.8933500627352572,
      "VeMo (random view)": 0.8356545961002786,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking down and takes a left turn."
  },
  "009302": {
    "text": "person is running in place and then stands still",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.47246045254941565,
      "Minus Multimodal Distance": -8.438961029052734,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.59609951172024e-05,
      "MoBERT-F": 0.47339588172832425,
      "MoBERT-N": 0.5979547264895482,
      "MoBERT-min(F/N)": 0.47339588172832425,
      "MoBERT-max(F/N)": 0.5979547264895482,
      "MotionCritic": -5.205219268798828,
      "VeMo (human-opt view)": 0.9724510082362965,
      "VeMo (max entropy view)": 0.7047244094488189,
      "VeMo (min entropy view)": 0.9724510082362965,
      "VeMo (random view)": 0.9724510082362965,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is running in place and then stands still."
  },
  "006006": {
    "text": "a figure puts two objects together from opposite positions.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3408602287709004,
      "Minus Multimodal Distance": -11.179728507995605,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00040516609442420304,
      "MoBERT-F": 0.5067138867802343,
      "MoBERT-N": 0.5003729102497264,
      "MoBERT-min(F/N)": 0.5003729102497264,
      "MoBERT-max(F/N)": 0.5067138867802343,
      "MotionCritic": -1.8055931329727173,
      "VeMo (human-opt view)": 1.4374014965339828e-05,
      "VeMo (max entropy view)": 0.00011984985680515575,
      "VeMo (min entropy view)": 1.4374014965339828e-05,
      "VeMo (random view)": 0.00011984985680515575,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person puts two objects together from opposite positions."
  },
  "012798": {
    "text": "a person holds something above, and swings.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.032781086277775,
      "Minus Multimodal Distance": -12.238699913024902,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9377316832542419,
      "MoBERT-F": 0.7549743561035183,
      "MoBERT-N": 0.6592530691112933,
      "MoBERT-min(F/N)": 0.6592530691112933,
      "MoBERT-max(F/N)": 0.7549743561035183,
      "MotionCritic": -13.047422409057617,
      "VeMo (human-opt view)": 0.8804283164782868,
      "VeMo (max entropy view)": 0.8079350766456267,
      "VeMo (min entropy view)": 0.8804283164782868,
      "VeMo (random view)": 0.8804283164782868,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person holds something above and swings it."
  },
  "014554": {
    "text": "a person who seems to slap something",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.31347178803563247,
      "Minus Multimodal Distance": -3.8928067684173584,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.129184551653452e-05,
      "MoBERT-F": 0.3648191685503396,
      "MoBERT-N": 0.42962933459094416,
      "MoBERT-min(F/N)": 0.3648191685503396,
      "MoBERT-max(F/N)": 0.42962933459094416,
      "MotionCritic": -9.404929161071777,
      "VeMo (human-opt view)": 0.9327046720960137,
      "VeMo (max entropy view)": 0.8746465598491989,
      "VeMo (min entropy view)": 0.9327046720960137,
      "VeMo (random view)": 0.8746465598491989,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who seems to slap something."
  },
  "011793": {
    "text": "figure seen walking in place lazily.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4137518612188992,
      "Minus Multimodal Distance": -12.846464157104492,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.600569678179454e-05,
      "MoBERT-F": 0.42089927838711716,
      "MoBERT-N": 0.4662509588261005,
      "MoBERT-min(F/N)": 0.42089927838711716,
      "MoBERT-max(F/N)": 0.4662509588261005,
      "MotionCritic": -7.617130756378174,
      "VeMo (human-opt view)": 0.8806539509536785,
      "VeMo (max entropy view)": 0.8171673819742489,
      "VeMo (min entropy view)": 0.8806539509536785,
      "VeMo (random view)": 0.8806539509536785,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Figure seen walking in place lazily."
  },
  "011743": {
    "text": "turning body from side to side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.40505244206361474,
      "Minus Multimodal Distance": -2.9672086238861084,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2453052224591374e-05,
      "MoBERT-F": 0.35357409314294314,
      "MoBERT-N": 0.47389430991683434,
      "MoBERT-min(F/N)": 0.35357409314294314,
      "MoBERT-max(F/N)": 0.47389430991683434,
      "MotionCritic": -3.9889073371887207,
      "VeMo (human-opt view)": 0.7545304777594728,
      "VeMo (max entropy view)": 0.718475073313783,
      "VeMo (min entropy view)": 0.7545304777594728,
      "VeMo (random view)": 0.7545304777594728,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is turning their body from side to side."
  },
  "008739": {
    "text": "a person jumps forward once.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3271317955786006,
      "Minus Multimodal Distance": -9.277848243713379,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.030301181599497795,
      "MoBERT-F": 0.6669887703229056,
      "MoBERT-N": 0.6387858564237983,
      "MoBERT-min(F/N)": 0.6387858564237983,
      "MoBERT-max(F/N)": 0.6669887703229056,
      "MotionCritic": -8.244087219238281,
      "VeMo (human-opt view)": 0.7054108216432866,
      "VeMo (max entropy view)": 0.7054108216432866,
      "VeMo (min entropy view)": 0.08484848484848485,
      "VeMo (random view)": 0.7054108216432866,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps forward once."
  },
  "008822": {
    "text": "a person walks forward slowly without moving forward, as though walking on a treadmill, and his arms remain still at his sides.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.36772858093979965,
      "Minus Multimodal Distance": -9.042387008666992,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4803788619465195e-05,
      "MoBERT-F": 0.3143860407743312,
      "MoBERT-N": 0.42446907220008473,
      "MoBERT-min(F/N)": 0.3143860407743312,
      "MoBERT-max(F/N)": 0.42446907220008473,
      "MotionCritic": -8.019881248474121,
      "VeMo (human-opt view)": 0.6372360844529751,
      "VeMo (max entropy view)": 0.6228287841191067,
      "VeMo (min entropy view)": 0.6372360844529751,
      "VeMo (random view)": 0.6228287841191067,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward slowly but doesn't actually move forward, as if walking on a treadmill, and his arms remain stationary at his sides."
  },
  "003596": {
    "text": "a person squats down to the ground, picks up a box, then stands back up, and places the box on a higher surface.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6017784825741185,
      "Minus Multimodal Distance": -8.842459678649902,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.3409973382949829,
      "MoBERT-F": 0.4415615690276976,
      "MoBERT-N": 0.4516677420852009,
      "MoBERT-min(F/N)": 0.4415615690276976,
      "MoBERT-max(F/N)": 0.4516677420852009,
      "MotionCritic": -2.7027835845947266,
      "VeMo (human-opt view)": 0.0008035816783377339,
      "VeMo (max entropy view)": 0.003163786809442687,
      "VeMo (min entropy view)": 0.0008035816783377339,
      "VeMo (random view)": 0.003163786809442687,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person squats down to the ground, picks up a box, then stands back up and places the box on a higher surface."
  },
  "003997": {
    "text": "a person bounces up and down on their toes with their arms bouncing along loosely by their sides.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4663428962957015,
      "Minus Multimodal Distance": -8.593592643737793,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0005184793844819069,
      "MoBERT-F": 0.5476868695599597,
      "MoBERT-N": 0.6516207825930811,
      "MoBERT-min(F/N)": 0.5476868695599597,
      "MoBERT-max(F/N)": 0.6516207825930811,
      "MotionCritic": -8.010883331298828,
      "VeMo (human-opt view)": 0.5921787709497207,
      "VeMo (max entropy view)": 0.5921787709497207,
      "VeMo (min entropy view)": 0.36259541984732824,
      "VeMo (random view)": 0.36259541984732824,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person bounces up and down on their toes, with their arms bouncing loosely by their sides."
  },
  "004602": {
    "text": "person appears to be running in straight line then jumps over something and continues running.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8316452178494139,
      "Minus Multimodal Distance": -9.874823570251465,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9980618357658386,
      "MoBERT-F": 0.7945279532469411,
      "MoBERT-N": 0.793628326237025,
      "MoBERT-min(F/N)": 0.793628326237025,
      "MoBERT-max(F/N)": 0.7945279532469411,
      "MotionCritic": -8.059093475341797,
      "VeMo (human-opt view)": 0.6794425087108014,
      "VeMo (max entropy view)": 0.5938697318007663,
      "VeMo (min entropy view)": 0.6794425087108014,
      "VeMo (random view)": 0.6794425087108014,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person appears to be running in a straight line, then jumps over something and continues running."
  },
  "007063": {
    "text": "a person in a t shape, bends down to the right and back up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9757841878903697,
      "Minus Multimodal Distance": -2.906797409057617,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.5944162607192993,
      "MoBERT-F": 0.6062105259619814,
      "MoBERT-N": 0.5821387557926107,
      "MoBERT-min(F/N)": 0.5821387557926107,
      "MoBERT-max(F/N)": 0.6062105259619814,
      "MotionCritic": -6.402656078338623,
      "VeMo (human-opt view)": 0.9243840271877655,
      "VeMo (max entropy view)": 0.9243840271877655,
      "VeMo (min entropy view)": 0.9553999262808699,
      "VeMo (random view)": 0.9553999262808699,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person in a T - shape bends down to the right and then straightens back up."
  },
  "000792": {
    "text": "a person imitates biting into something then waves their right hand around randomly.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.32687879818747684,
      "Minus Multimodal Distance": -8.834466934204102,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.330304313451052e-05,
      "MoBERT-F": 0.27706903632453656,
      "MoBERT-N": 0.32078863537907554,
      "MoBERT-min(F/N)": 0.27706903632453656,
      "MoBERT-max(F/N)": 0.32078863537907554,
      "MotionCritic": -1.1920242309570312,
      "VeMo (human-opt view)": 0.46814404432132967,
      "VeMo (max entropy view)": 0.46814404432132967,
      "VeMo (min entropy view)": 0.014951627088830254,
      "VeMo (random view)": 0.014951627088830254,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person imitates biting into something and then waves their right hand around randomly."
  },
  "010195": {
    "text": "a person, slowly walked forward, and after made circle with right hand",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8449881062508128,
      "Minus Multimodal Distance": -4.075040340423584,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00020970763580407947,
      "MoBERT-F": 0.41372688898584203,
      "MoBERT-N": 0.5491348974166832,
      "MoBERT-min(F/N)": 0.41372688898584203,
      "MoBERT-max(F/N)": 0.5491348974166832,
      "MotionCritic": -7.577911853790283,
      "VeMo (human-opt view)": 0.4849624060150376,
      "VeMo (max entropy view)": 0.4849624060150376,
      "VeMo (min entropy view)": 0.6925566343042071,
      "VeMo (random view)": 0.4849624060150376,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person slowly walked forward and then made a circle with the right hand."
  },
  "002662": {
    "text": "a man is crab walking backwards and then rolls to the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5612027408013271,
      "Minus Multimodal Distance": -3.649892568588257,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.08211731910705566,
      "MoBERT-F": 0.47997429547498804,
      "MoBERT-N": 0.418535723775313,
      "MoBERT-min(F/N)": 0.418535723775313,
      "MoBERT-max(F/N)": 0.47997429547498804,
      "MotionCritic": -4.700406551361084,
      "VeMo (human-opt view)": 0.5610561056105611,
      "VeMo (max entropy view)": 0.5610561056105611,
      "VeMo (min entropy view)": 0.6081081081081081,
      "VeMo (random view)": 0.5610561056105611,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man is crab - walking backwards and then rolls to the left."
  },
  "014389": {
    "text": "a person picks something up, rubs it and puts it back down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8029652750483461,
      "Minus Multimodal Distance": -5.917111873626709,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3873049940448254e-05,
      "MoBERT-F": 0.32826494530387096,
      "MoBERT-N": 0.4475394268384611,
      "MoBERT-min(F/N)": 0.32826494530387096,
      "MoBERT-max(F/N)": 0.4475394268384611,
      "MotionCritic": -5.0548930168151855,
      "VeMo (human-opt view)": 0.6075949367088608,
      "VeMo (max entropy view)": 0.6075949367088608,
      "VeMo (min entropy view)": 0.10086455331412104,
      "VeMo (random view)": 0.10086455331412104,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person picks something up, rubs it, and puts it back down."
  },
  "005747": {
    "text": "a man dances rhythmically from side to side and then turns",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.044862590110426,
      "Minus Multimodal Distance": -4.458342552185059,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0024717715568840504,
      "MoBERT-F": 0.651286891834121,
      "MoBERT-N": 0.7158956469883473,
      "MoBERT-min(F/N)": 0.651286891834121,
      "MoBERT-max(F/N)": 0.7158956469883473,
      "MotionCritic": -7.874960422515869,
      "VeMo (human-opt view)": 0.9432206019236736,
      "VeMo (max entropy view)": 0.9281553398058252,
      "VeMo (min entropy view)": 0.9432206019236736,
      "VeMo (random view)": 0.9432206019236736,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man dances rhythmically from side to side and then turns."
  },
  "008801": {
    "text": "a man jumps forward with his arms at his sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3856540361598376,
      "Minus Multimodal Distance": -4.654685974121094,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0075425212271511555,
      "MoBERT-F": 0.6201960476872813,
      "MoBERT-N": 0.6564671379346713,
      "MoBERT-min(F/N)": 0.6201960476872813,
      "MoBERT-max(F/N)": 0.6564671379346713,
      "MotionCritic": -8.16833782196045,
      "VeMo (human-opt view)": 0.7661870503597122,
      "VeMo (max entropy view)": 0.6787003610108303,
      "VeMo (min entropy view)": 0.7661870503597122,
      "VeMo (random view)": 0.6787003610108303,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man jumps forward with his arms at his sides."
  },
  "008084": {
    "text": "a person wlowly walked by making the circle",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0289320866837874,
      "Minus Multimodal Distance": -3.370898723602295,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9676195979118347,
      "MoBERT-F": 0.708250314139906,
      "MoBERT-N": 0.7283087392419667,
      "MoBERT-min(F/N)": 0.708250314139906,
      "MoBERT-max(F/N)": 0.7283087392419667,
      "MotionCritic": -13.843636512756348,
      "VeMo (human-opt view)": 0.21241513094083414,
      "VeMo (max entropy view)": 0.21241513094083414,
      "VeMo (min entropy view)": 0.14809590973201692,
      "VeMo (random view)": 0.21241513094083414,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person slowly walked by, making a circle."
  },
  "006249": {
    "text": "the person goes for a short jog",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.43047038999848425,
      "Minus Multimodal Distance": -4.399424076080322,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.2824725571554154e-05,
      "MoBERT-F": 0.434270925105013,
      "MoBERT-N": 0.5675572557850849,
      "MoBERT-min(F/N)": 0.434270925105013,
      "MoBERT-max(F/N)": 0.5675572557850849,
      "MotionCritic": -8.50693416595459,
      "VeMo (human-opt view)": 0.9924213320294765,
      "VeMo (max entropy view)": 0.9325349301397206,
      "VeMo (min entropy view)": 0.9924213320294765,
      "VeMo (random view)": 0.9325349301397206,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person goes for a short jog."
  },
  "008936": {
    "text": "the person is dribbling a basketball backwards",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5396811195281965,
      "Minus Multimodal Distance": -9.943187713623047,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9906389713287354,
      "MoBERT-F": 0.656959284346311,
      "MoBERT-N": 0.5812663428145973,
      "MoBERT-min(F/N)": 0.5812663428145973,
      "MoBERT-max(F/N)": 0.656959284346311,
      "MotionCritic": -1.2624706029891968,
      "VeMo (human-opt view)": 1.1886234253572961e-05,
      "VeMo (max entropy view)": 3.9032587952840116e-05,
      "VeMo (min entropy view)": 1.1886234253572961e-05,
      "VeMo (random view)": 1.1886234253572961e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is dribbling a basketball backwards."
  },
  "005935": {
    "text": "place items in a line up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.34958449094818383,
      "Minus Multimodal Distance": -6.125981330871582,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.133236532448791e-05,
      "MoBERT-F": 0.40348733331281383,
      "MoBERT-N": 0.5031190409813361,
      "MoBERT-min(F/N)": 0.40348733331281383,
      "MoBERT-max(F/N)": 0.5031190409813361,
      "MotionCritic": -3.286336898803711,
      "VeMo (human-opt view)": 0.00021665151367506247,
      "VeMo (max entropy view)": 0.0011714824890326691,
      "VeMo (min entropy view)": 0.00021665151367506247,
      "VeMo (random view)": 0.00021665151367506247,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person places items in a line up."
  },
  "011491": {
    "text": "a person is running rapidly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6579735887206645,
      "Minus Multimodal Distance": -4.139641761779785,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.06601888686418533,
      "MoBERT-F": 0.704094549120543,
      "MoBERT-N": 0.6555027751744116,
      "MoBERT-min(F/N)": 0.6555027751744116,
      "MoBERT-max(F/N)": 0.704094549120543,
      "MotionCritic": -8.12747573852539,
      "VeMo (human-opt view)": 0.8933200398803589,
      "VeMo (max entropy view)": 0.7317676143386898,
      "VeMo (min entropy view)": 0.8933200398803589,
      "VeMo (random view)": 0.8933200398803589,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is running rapidly."
  },
  "001738": {
    "text": "a person taps his right hand in the air twice. they then make a rowing movement on each side of their body and then outline a cloud infront of them",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4109854834577572,
      "Minus Multimodal Distance": -7.40006685256958,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7686357498168945e-05,
      "MoBERT-F": 0.3475470756480672,
      "MoBERT-N": 0.45870077343093335,
      "MoBERT-min(F/N)": 0.3475470756480672,
      "MoBERT-max(F/N)": 0.45870077343093335,
      "MotionCritic": -5.215689182281494,
      "VeMo (human-opt view)": 0.39295392953929537,
      "VeMo (max entropy view)": 0.39295392953929537,
      "VeMo (min entropy view)": 0.6078431372549019,
      "VeMo (random view)": 0.6078431372549019,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person taps his right hand in the air twice. Then, he makes a rowing movement on each side of his body and then outlines a cloud in front of him."
  },
  "007559": {
    "text": "a person is walking, turns back and to their left, proceeds to walk again, trips, then turns back once more, limping now.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0953821115011675,
      "Minus Multimodal Distance": -6.420452117919922,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8141539096832275,
      "MoBERT-F": 0.6600099767180745,
      "MoBERT-N": 0.6925059114660475,
      "MoBERT-min(F/N)": 0.6600099767180745,
      "MoBERT-max(F/N)": 0.6925059114660475,
      "MotionCritic": -11.632026672363281,
      "VeMo (human-opt view)": 0.8439897698209718,
      "VeMo (max entropy view)": 0.7981330221703618,
      "VeMo (min entropy view)": 0.8439897698209718,
      "VeMo (random view)": 0.8439897698209718,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking, turns back and to their left, proceeds to walk again, trips, then turns back once more, now limping."
  },
  "000104": {
    "text": "a man walks forward and takes two steps, turns clockwise and then goesni to a sitting position from standing.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0686547532388544,
      "Minus Multimodal Distance": -7.664613246917725,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.03451007232069969,
      "MoBERT-F": 0.5388952959373322,
      "MoBERT-N": 0.47264937663933304,
      "MoBERT-min(F/N)": 0.47264937663933304,
      "MoBERT-max(F/N)": 0.5388952959373322,
      "MotionCritic": -13.852873802185059,
      "VeMo (human-opt view)": 0.6657963446475196,
      "VeMo (max entropy view)": 0.6657963446475196,
      "VeMo (min entropy view)": 0.7181818181818181,
      "VeMo (random view)": 0.6657963446475196,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks forward and takes two steps, turns clockwise, and then goes into a sitting position from a standing position."
  },
  "008227": {
    "text": "a person jumps and stretches out her legs and arms to the sides, pauses, and then performs jumping jacks.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8196903699408552,
      "Minus Multimodal Distance": -8.587156295776367,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.986327052116394,
      "MoBERT-F": 0.8337975672245304,
      "MoBERT-N": 0.8050044781347885,
      "MoBERT-min(F/N)": 0.8050044781347885,
      "MoBERT-max(F/N)": 0.8337975672245304,
      "MotionCritic": -6.252844333648682,
      "VeMo (human-opt view)": 0.6375,
      "VeMo (max entropy view)": 0.6225352112676056,
      "VeMo (min entropy view)": 0.6375,
      "VeMo (random view)": 0.6225352112676056,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person jumps, stretches out their legs and arms to the sides, pauses, and then does jumping jacks."
  },
  "011226": {
    "text": "the person uses the left to grab the right elbow and swing it. the right arm raises up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.497886138124001,
      "Minus Multimodal Distance": -2.904649496078491,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4242095605586655e-05,
      "MoBERT-F": 0.3354704543563133,
      "MoBERT-N": 0.4463041629588036,
      "MoBERT-min(F/N)": 0.3354704543563133,
      "MoBERT-max(F/N)": 0.4463041629588036,
      "MotionCritic": -7.9823079109191895,
      "VeMo (human-opt view)": 0.7880184331797235,
      "VeMo (max entropy view)": 0.7880184331797235,
      "VeMo (min entropy view)": 0.8079911209766926,
      "VeMo (random view)": 0.8079911209766926,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person uses the left hand to grab the right elbow and swing it. The right arm raises up."
  },
  "010026": {
    "text": "the person tries to stable themselves standing still but takes a step forward after losing balance",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.36309775703632763,
      "Minus Multimodal Distance": -4.081328868865967,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.7066187360323966e-05,
      "MoBERT-F": 0.2750541062755557,
      "MoBERT-N": 0.404628784745998,
      "MoBERT-min(F/N)": 0.2750541062755557,
      "MoBERT-max(F/N)": 0.404628784745998,
      "MotionCritic": -3.987523078918457,
      "VeMo (human-opt view)": 0.9465400271370421,
      "VeMo (max entropy view)": 0.9243937232524965,
      "VeMo (min entropy view)": 0.9465400271370421,
      "VeMo (random view)": 0.9465400271370421,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person tries to steady themselves while standing still but takes a step forward after losing balance."
  },
  "009123": {
    "text": "a person moves forward quickly and lifts both legs before landing and continuing to move forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7135650479013237,
      "Minus Multimodal Distance": -6.606102466583252,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0001067283665179275,
      "MoBERT-F": 0.47200530243450833,
      "MoBERT-N": 0.607408238007108,
      "MoBERT-min(F/N)": 0.47200530243450833,
      "MoBERT-max(F/N)": 0.607408238007108,
      "MotionCritic": -5.20024299621582,
      "VeMo (human-opt view)": 0.5784615384615385,
      "VeMo (max entropy view)": 0.5784615384615385,
      "VeMo (min entropy view)": 0.766295707472178,
      "VeMo (random view)": 0.766295707472178,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person moves forward rapidly, lifts both legs before landing, and then continues to move forward."
  },
  "012423": {
    "text": "a man stands with his arms at his sides, and sways slightly to his left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.18714720341678598,
      "Minus Multimodal Distance": -4.331507682800293,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.903889566776343e-05,
      "MoBERT-F": 0.2765336802176569,
      "MoBERT-N": 0.3723009991208288,
      "MoBERT-min(F/N)": 0.2765336802176569,
      "MoBERT-max(F/N)": 0.3723009991208288,
      "MotionCritic": -6.216743469238281,
      "VeMo (human-opt view)": 0.36363636363636365,
      "VeMo (max entropy view)": 0.36363636363636365,
      "VeMo (min entropy view)": 0.3079470198675497,
      "VeMo (random view)": 0.36363636363636365,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man stands with his arms at his sides and sways slightly to his left."
  },
  "013314": {
    "text": "a person stretches his shoulders and arms with help from the opposite hands",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4684212999143603,
      "Minus Multimodal Distance": -7.3887939453125,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.748448529629968e-05,
      "MoBERT-F": 0.36293801843771295,
      "MoBERT-N": 0.4237675221603313,
      "MoBERT-min(F/N)": 0.36293801843771295,
      "MoBERT-max(F/N)": 0.4237675221603313,
      "MotionCritic": -7.144742488861084,
      "VeMo (human-opt view)": 0.8932038834951457,
      "VeMo (max entropy view)": 0.8808757244043787,
      "VeMo (min entropy view)": 0.8932038834951457,
      "VeMo (random view)": 0.8808757244043787,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stretches his shoulders and arms with the help of the opposite hands."
  },
  "006068": {
    "text": "a walking person suddenly gets staggered to their left, then recovers.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5860413870695126,
      "Minus Multimodal Distance": -7.073083877563477,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.004066393245011568,
      "MoBERT-F": 0.6065071264162473,
      "MoBERT-N": 0.5107841194780782,
      "MoBERT-min(F/N)": 0.5107841194780782,
      "MoBERT-max(F/N)": 0.6065071264162473,
      "MotionCritic": -4.579422950744629,
      "VeMo (human-opt view)": 0.9797595808075619,
      "VeMo (max entropy view)": 0.9498164014687882,
      "VeMo (min entropy view)": 0.9797595808075619,
      "VeMo (random view)": 0.9797595808075619,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A walking person suddenly staggers to their left, then recovers."
  },
  "012388": {
    "text": "this person ducks under something then climbs up and over it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3482276344573318,
      "Minus Multimodal Distance": -7.593226432800293,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.867169201374054,
      "MoBERT-F": 0.7476589251321606,
      "MoBERT-N": 0.6458970394949318,
      "MoBERT-min(F/N)": 0.6458970394949318,
      "MoBERT-max(F/N)": 0.7476589251321606,
      "MotionCritic": -7.525228500366211,
      "VeMo (human-opt view)": 0.29411764705882354,
      "VeMo (max entropy view)": 0.3486943164362519,
      "VeMo (min entropy view)": 0.29411764705882354,
      "VeMo (random view)": 0.3486943164362519,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person ducks under something, then climbs up and over it."
  },
  "003430": {
    "text": "a person walks in anticlockwise direction dragging his left feet.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8633458325115055,
      "Minus Multimodal Distance": -3.1870005130767822,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9911506175994873,
      "MoBERT-F": 0.7143625777314788,
      "MoBERT-N": 0.7455851171547265,
      "MoBERT-min(F/N)": 0.7143625777314788,
      "MoBERT-max(F/N)": 0.7455851171547265,
      "MotionCritic": -9.651802062988281,
      "VeMo (human-opt view)": 0.7775891341256367,
      "VeMo (max entropy view)": 0.7435456110154905,
      "VeMo (min entropy view)": 0.7775891341256367,
      "VeMo (random view)": 0.7435456110154905,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks in an anticlockwise direction, dragging his left foot."
  },
  "001182": {
    "text": "a person lifts each knee towards the opposite elbow",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.47853061155093346,
      "Minus Multimodal Distance": -11.04417610168457,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.542747749132104e-05,
      "MoBERT-F": 0.27215399510719485,
      "MoBERT-N": 0.3783864604807679,
      "MoBERT-min(F/N)": 0.27215399510719485,
      "MoBERT-max(F/N)": 0.3783864604807679,
      "MotionCritic": -4.569499492645264,
      "VeMo (human-opt view)": 3.0234093829108426e-05,
      "VeMo (max entropy view)": 4.26662748527519e-05,
      "VeMo (min entropy view)": 3.0234093829108426e-05,
      "VeMo (random view)": 4.26662748527519e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lifts each knee towards the opposite elbow."
  },
  "009485": {
    "text": "a figure raises a weight in front of themself.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.26824693018812873,
      "Minus Multimodal Distance": -10.565340042114258,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7195286747883074e-05,
      "MoBERT-F": 0.42329445961158485,
      "MoBERT-N": 0.4789681624933501,
      "MoBERT-min(F/N)": 0.42329445961158485,
      "MoBERT-max(F/N)": 0.4789681624933501,
      "MotionCritic": -6.4608988761901855,
      "VeMo (human-opt view)": 0.0019297644120095999,
      "VeMo (max entropy view)": 0.012400110223201984,
      "VeMo (min entropy view)": 0.0019297644120095999,
      "VeMo (random view)": 0.012400110223201984,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure raises a weight in front of themselves."
  },
  "011640": {
    "text": "a person walks in an s shape pattern.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7291282144636928,
      "Minus Multimodal Distance": -12.467450141906738,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.38268429040908813,
      "MoBERT-F": 0.6109060615342838,
      "MoBERT-N": 0.644269054599422,
      "MoBERT-min(F/N)": 0.6109060615342838,
      "MoBERT-max(F/N)": 0.644269054599422,
      "MotionCritic": -13.289326667785645,
      "VeMo (human-opt view)": 0.6232686980609419,
      "VeMo (max entropy view)": 0.5615615615615616,
      "VeMo (min entropy view)": 0.6232686980609419,
      "VeMo (random view)": 0.6232686980609419,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks in an S - shaped pattern."
  },
  "009716": {
    "text": "the person is doing basketball signals.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.34515805197539406,
      "Minus Multimodal Distance": -9.936662673950195,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00030452676583081484,
      "MoBERT-F": 0.5482249061376049,
      "MoBERT-N": 0.6134340438102381,
      "MoBERT-min(F/N)": 0.5482249061376049,
      "MoBERT-max(F/N)": 0.6134340438102381,
      "MotionCritic": -2.107560157775879,
      "VeMo (human-opt view)": 0.8667496886674969,
      "VeMo (max entropy view)": 0.8667496886674969,
      "VeMo (min entropy view)": 0.8869309838472834,
      "VeMo (random view)": 0.8667496886674969,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is making basketball signals."
  },
  "010752": {
    "text": "staying still then backing up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.15753941217987633,
      "Minus Multimodal Distance": -7.097560882568359,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5880335670080967e-05,
      "MoBERT-F": 0.3861449351876651,
      "MoBERT-N": 0.4800625846541383,
      "MoBERT-min(F/N)": 0.3861449351876651,
      "MoBERT-max(F/N)": 0.4800625846541383,
      "MotionCritic": -5.407838344573975,
      "VeMo (human-opt view)": 0.6923076923076923,
      "VeMo (max entropy view)": 0.6923076923076923,
      "VeMo (min entropy view)": 0.7976878612716763,
      "VeMo (random view)": 0.7976878612716763,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stays still then backs up."
  },
  "003149": {
    "text": "a person standing on one foot holds their right hand up while moving their left foot in a side to side motion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5103926881024285,
      "Minus Multimodal Distance": -4.5886406898498535,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.6453485992969945e-05,
      "MoBERT-F": 0.35444720020322507,
      "MoBERT-N": 0.45361643530141954,
      "MoBERT-min(F/N)": 0.35444720020322507,
      "MoBERT-max(F/N)": 0.45361643530141954,
      "MotionCritic": -4.048080921173096,
      "VeMo (human-opt view)": 0.008605851979345954,
      "VeMo (max entropy view)": 0.060240963855421686,
      "VeMo (min entropy view)": 0.008605851979345954,
      "VeMo (random view)": 0.008605851979345954,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person standing on one foot holds their right hand up while moving their left foot in a side - to - side motion."
  },
  "011798": {
    "text": "a person who is standing with his hands by his sides, turns and steps backwards, jogs forward six steps, turns 180 degrees and jogs four steps, then stops and resumes his original position.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3101155749187243,
      "Minus Multimodal Distance": -8.565468788146973,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8965539336204529,
      "MoBERT-F": 0.5686552523604247,
      "MoBERT-N": 0.6531076544813059,
      "MoBERT-min(F/N)": 0.5686552523604247,
      "MoBERT-max(F/N)": 0.6531076544813059,
      "MotionCritic": -12.125598907470703,
      "VeMo (human-opt view)": 0.8869690424766019,
      "VeMo (max entropy view)": 0.8869690424766019,
      "VeMo (min entropy view)": 0.8935219657483247,
      "VeMo (random view)": 0.8935219657483247,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who is standing with his hands by his sides turns and steps backwards, jogs forward six steps, turns 180 degrees and jogs four steps, then stops and returns to his original position."
  },
  "009405": {
    "text": "a person walks forward and bends down and grabs his left knee in pain. he attempts to straighten up and walk forward and then bends down to grab his knee again. he then walks backward to his left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.700346748237238,
      "Minus Multimodal Distance": -7.28372859954834,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8489398956298828,
      "MoBERT-F": 0.5102447487725053,
      "MoBERT-N": 0.5678157925458157,
      "MoBERT-min(F/N)": 0.5102447487725053,
      "MoBERT-max(F/N)": 0.5678157925458157,
      "MotionCritic": -7.749262809753418,
      "VeMo (human-opt view)": 0.8738229755178908,
      "VeMo (max entropy view)": 0.8669950738916257,
      "VeMo (min entropy view)": 0.8738229755178908,
      "VeMo (random view)": 0.8669950738916257,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, bends down, and grabs his left knee in pain. He attempts to straighten up, walk forward, and then bends down to grab his knee again. Then, he walks backward to his left."
  },
  "004679": {
    "text": "a person takes their hands from their side places them above their shoulders and then places them back at their side",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6810196695391127,
      "Minus Multimodal Distance": -9.431737899780273,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.1093040383420885e-05,
      "MoBERT-F": 0.352564501115328,
      "MoBERT-N": 0.36239083010991235,
      "MoBERT-min(F/N)": 0.352564501115328,
      "MoBERT-max(F/N)": 0.36239083010991235,
      "MotionCritic": -10.964151382446289,
      "VeMo (human-opt view)": 0.9964162810015427,
      "VeMo (max entropy view)": 0.9954009532569613,
      "VeMo (min entropy view)": 0.9964162810015427,
      "VeMo (random view)": 0.9964162810015427,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person takes their hands from their sides, places them above their shoulders, and then places them back at their sides."
  },
  "004807": {
    "text": "a person on a swivel chair moved from front to left typing and talking on the phone motions.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6600426646823319,
      "Minus Multimodal Distance": -8.531012535095215,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.390239023952745e-05,
      "MoBERT-F": 0.3308405248683889,
      "MoBERT-N": 0.4313976951556506,
      "MoBERT-min(F/N)": 0.3308405248683889,
      "MoBERT-max(F/N)": 0.4313976951556506,
      "MotionCritic": -8.05221176147461,
      "VeMo (human-opt view)": 2.0798171626901015e-05,
      "VeMo (max entropy view)": 0.00029553869398462533,
      "VeMo (min entropy view)": 2.0798171626901015e-05,
      "VeMo (random view)": 0.00029553869398462533,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person on a swivel chair moved from the front to the left, typing and talking on the phone simultaneously."
  },
  "004222": {
    "text": "a person walks in a clockwise circle and raises their hand to their face to yawn.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5510527965426322,
      "Minus Multimodal Distance": -1.5316317081451416,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.804277341463603e-05,
      "MoBERT-F": 0.28844810822417855,
      "MoBERT-N": 0.5167473150248978,
      "MoBERT-min(F/N)": 0.28844810822417855,
      "MoBERT-max(F/N)": 0.5167473150248978,
      "MotionCritic": -11.470871925354004,
      "VeMo (human-opt view)": 0.7318757192174914,
      "VeMo (max entropy view)": 0.7318757192174914,
      "VeMo (min entropy view)": 0.7436743674367436,
      "VeMo (random view)": 0.7436743674367436,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks in a clockwise circle and raises their hand to their face to yawn."
  },
  "006457": {
    "text": "a person catches a ball with their left arm then throws it with their right arm.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.477369109729395,
      "Minus Multimodal Distance": -7.699883937835693,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0035583339631557465,
      "MoBERT-F": 0.5224272017559538,
      "MoBERT-N": 0.5083897900247479,
      "MoBERT-min(F/N)": 0.5083897900247479,
      "MoBERT-max(F/N)": 0.5224272017559538,
      "MotionCritic": -7.210408687591553,
      "VeMo (human-opt view)": 0.10089399744572158,
      "VeMo (max entropy view)": 0.10089399744572158,
      "VeMo (min entropy view)": 0.07578558225508318,
      "VeMo (random view)": 0.10089399744572158,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person catches a ball with their left arm and then throws it with their right arm."
  },
  "009777": {
    "text": "a person lifts their right forearm upwards towards their mouth and then puts it down again.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.18484935522272483,
      "Minus Multimodal Distance": -7.727227687835693,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.393410199554637e-05,
      "MoBERT-F": 0.27905451759223054,
      "MoBERT-N": 0.3765795318459729,
      "MoBERT-min(F/N)": 0.27905451759223054,
      "MoBERT-max(F/N)": 0.3765795318459729,
      "MotionCritic": -9.462854385375977,
      "VeMo (human-opt view)": 0.9324618736383442,
      "VeMo (max entropy view)": 0.6653306613226453,
      "VeMo (min entropy view)": 0.9324618736383442,
      "VeMo (random view)": 0.6653306613226453,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person lifts their right forearm upward toward their mouth and then puts it down again."
  },
  "002486": {
    "text": "a person is walking forwards, but stumbles and steps back, then carries on forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6655146065171311,
      "Minus Multimodal Distance": -5.453967571258545,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0005095560918562114,
      "MoBERT-F": 0.5091922754836249,
      "MoBERT-N": 0.5500759624573665,
      "MoBERT-min(F/N)": 0.5091922754836249,
      "MoBERT-max(F/N)": 0.5500759624573665,
      "MotionCritic": -6.103320121765137,
      "VeMo (human-opt view)": 0.9944506104328524,
      "VeMo (max entropy view)": 0.9840955586352538,
      "VeMo (min entropy view)": 0.9944506104328524,
      "VeMo (random view)": 0.9944506104328524,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking forward, but stumbles and steps back, then carries on forward."
  },
  "011595": {
    "text": "a person hustles down a short flight of steps before coming to a stop.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.38129288718931886,
      "Minus Multimodal Distance": -6.839916706085205,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0002829694713000208,
      "MoBERT-F": 0.4490593005748315,
      "MoBERT-N": 0.5557319342425444,
      "MoBERT-min(F/N)": 0.4490593005748315,
      "MoBERT-max(F/N)": 0.5557319342425444,
      "MotionCritic": -3.863856792449951,
      "VeMo (human-opt view)": 0.14827201783723523,
      "VeMo (max entropy view)": 0.14827201783723523,
      "VeMo (min entropy view)": 0.020354128744001325,
      "VeMo (random view)": 0.020354128744001325,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person hustles down a short flight of steps before coming to a stop."
  },
  "012356": {
    "text": "a person is dancing and starts to take a drink with their left hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4561433712001316,
      "Minus Multimodal Distance": -9.079596519470215,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.1500181396258995e-05,
      "MoBERT-F": 0.43832428862079986,
      "MoBERT-N": 0.4587196526059757,
      "MoBERT-min(F/N)": 0.43832428862079986,
      "MoBERT-max(F/N)": 0.4587196526059757,
      "MotionCritic": -9.761762619018555,
      "VeMo (human-opt view)": 0.017902813299232736,
      "VeMo (max entropy view)": 0.017902813299232736,
      "VeMo (min entropy view)": 0.006695069993913573,
      "VeMo (random view)": 0.017902813299232736,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is dancing and starts to take a drink with their left hand."
  },
  "009986": {
    "text": "walking forward making a 90 degree turn to the left at a gradual angle",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6114083214536897,
      "Minus Multimodal Distance": -9.349882125854492,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2431737306760624e-05,
      "MoBERT-F": 0.33152364261863543,
      "MoBERT-N": 0.4924534897282998,
      "MoBERT-min(F/N)": 0.33152364261863543,
      "MoBERT-max(F/N)": 0.4924534897282998,
      "MotionCritic": -10.279441833496094,
      "VeMo (human-opt view)": 0.9284253578732107,
      "VeMo (max entropy view)": 0.9284253578732107,
      "VeMo (min entropy view)": 0.9363077917879081,
      "VeMo (random view)": 0.9363077917879081,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking forward, making a 90 - degree turn to the left at a gradual angle."
  },
  "013411": {
    "text": "a man movea his right hand as if to wipe down a tabletop.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4623819442563673,
      "Minus Multimodal Distance": -5.163479328155518,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.602751166909002e-05,
      "MoBERT-F": 0.3087554106391845,
      "MoBERT-N": 0.38521319073571086,
      "MoBERT-min(F/N)": 0.3087554106391845,
      "MoBERT-max(F/N)": 0.38521319073571086,
      "MotionCritic": -2.8889713287353516,
      "VeMo (human-opt view)": 0.07567127746135069,
      "VeMo (max entropy view)": 0.7985739750445633,
      "VeMo (min entropy view)": 0.07567127746135069,
      "VeMo (random view)": 0.7985739750445633,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man moves his right hand as if to wipe down a tabletop."
  },
  "013665": {
    "text": "a person walks forward with exaggerated backward kicks with every step.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5938096450420769,
      "Minus Multimodal Distance": -3.6397650241851807,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9527782201766968,
      "MoBERT-F": 0.6944845930969897,
      "MoBERT-N": 0.6188283459660088,
      "MoBERT-min(F/N)": 0.6188283459660088,
      "MoBERT-max(F/N)": 0.6944845930969897,
      "MotionCritic": -6.117439270019531,
      "VeMo (human-opt view)": 0.8934010152284264,
      "VeMo (max entropy view)": 0.8934010152284264,
      "VeMo (min entropy view)": 0.9324991768192296,
      "VeMo (random view)": 0.8934010152284264,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, taking each step with exaggerated backward kicks."
  },
  "010183": {
    "text": "figure appears to be carrying an itemm (large item)",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8229743820468647,
      "Minus Multimodal Distance": -8.86568832397461,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.1758859626716e-05,
      "MoBERT-F": 0.43980665010150527,
      "MoBERT-N": 0.5002255399752622,
      "MoBERT-min(F/N)": 0.43980665010150527,
      "MoBERT-max(F/N)": 0.5002255399752622,
      "MotionCritic": -3.331418991088867,
      "VeMo (human-opt view)": 0.0003803936845482252,
      "VeMo (max entropy view)": 0.0003803936845482252,
      "VeMo (min entropy view)": 7.985877333028615e-05,
      "VeMo (random view)": 7.985877333028615e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The figure appears to be carrying an item (a large item)."
  },
  "009600": {
    "text": "a person ballet dances in a triangle.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.09901629318297,
      "Minus Multimodal Distance": -13.845466613769531,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9336962103843689,
      "MoBERT-F": 0.7964109632262922,
      "MoBERT-N": 0.7532799201788798,
      "MoBERT-min(F/N)": 0.7532799201788798,
      "MoBERT-max(F/N)": 0.7964109632262922,
      "MotionCritic": -6.3568902015686035,
      "VeMo (human-opt view)": 0.004073842991889016,
      "VeMo (max entropy view)": 0.012420022581859239,
      "VeMo (min entropy view)": 0.004073842991889016,
      "VeMo (random view)": 0.004073842991889016,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person dances ballet in a triangle."
  },
  "005321": {
    "text": "a person who is standing with his hands at this sides reaches down to his left, picks up something, moves the object to his right and places it down and returns to his standing position with his hands to his sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.15456488387337755,
      "Minus Multimodal Distance": -10.058786392211914,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.415886385482736e-05,
      "MoBERT-F": 0.30128624431516915,
      "MoBERT-N": 0.44945731944682477,
      "MoBERT-min(F/N)": 0.30128624431516915,
      "MoBERT-max(F/N)": 0.44945731944682477,
      "MotionCritic": -5.013579368591309,
      "VeMo (human-opt view)": 0.8352490421455939,
      "VeMo (max entropy view)": 0.8352490421455939,
      "VeMo (min entropy view)": 0.8444902162718847,
      "VeMo (random view)": 0.8352490421455939,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who is standing with his hands at his sides reaches down to his left, picks up something, moves the object to his right, places it down, and returns to his standing position with his hands at his sides."
  },
  "010874": {
    "text": "a man brings his hands down to his thighs, standing with his knees bent, before extending his arms to either side and then bringing them back down to his thighs.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.1611844897854531,
      "Minus Multimodal Distance": -9.177553176879883,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.4887681977124885e-05,
      "MoBERT-F": 0.3338939368124675,
      "MoBERT-N": 0.44708481819489015,
      "MoBERT-min(F/N)": 0.3338939368124675,
      "MoBERT-max(F/N)": 0.44708481819489015,
      "MotionCritic": -13.131538391113281,
      "VeMo (human-opt view)": 0.7661870503597122,
      "VeMo (max entropy view)": 0.7543859649122807,
      "VeMo (min entropy view)": 0.7661870503597122,
      "VeMo (random view)": 0.7661870503597122,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man brings his hands down to his thighs, standing with his knees bent, before extending his arms to either side and then bringing them back down to his thighs."
  },
  "014021": {
    "text": "a person's squats down using mainly their right leg, their left leg crosses their right leg, and then they stand back up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5081180384186872,
      "Minus Multimodal Distance": -9.308822631835938,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.7268493175506592,
      "MoBERT-F": 0.5677133498554607,
      "MoBERT-N": 0.5685839869785151,
      "MoBERT-min(F/N)": 0.5677133498554607,
      "MoBERT-max(F/N)": 0.5685839869785151,
      "MotionCritic": -4.959405422210693,
      "VeMo (human-opt view)": 0.8597748208802457,
      "VeMo (max entropy view)": 0.7435897435897436,
      "VeMo (min entropy view)": 0.8597748208802457,
      "VeMo (random view)": 0.7435897435897436,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person squats down using mainly their right leg, with their left leg crossing their right leg, and then they stand back up."
  },
  "005119": {
    "text": "person appears to be holding some thing with both hands and then throws it forward with their right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6065492986833737,
      "Minus Multimodal Distance": -5.125710487365723,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.11849687248468399,
      "MoBERT-F": 0.6330590873088899,
      "MoBERT-N": 0.5734306444584739,
      "MoBERT-min(F/N)": 0.5734306444584739,
      "MoBERT-max(F/N)": 0.6330590873088899,
      "MotionCritic": -5.893234729766846,
      "VeMo (human-opt view)": 0.2017075773745998,
      "VeMo (max entropy view)": 0.2017075773745998,
      "VeMo (min entropy view)": 0.16443189837685251,
      "VeMo (random view)": 0.16443189837685251,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person appears to be holding something with both hands and then throws it forward with their right hand."
  },
  "010443": {
    "text": "a person walks in a clockwise circle swaying one arm and keeping the other arm still",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.42690573010662014,
      "Minus Multimodal Distance": -10.867844581604004,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.176621925784275e-05,
      "MoBERT-F": 0.4184188172630432,
      "MoBERT-N": 0.53683571137125,
      "MoBERT-min(F/N)": 0.4184188172630432,
      "MoBERT-max(F/N)": 0.53683571137125,
      "MotionCritic": -8.753254890441895,
      "VeMo (human-opt view)": 0.7661290322580645,
      "VeMo (max entropy view)": 0.7661290322580645,
      "VeMo (min entropy view)": 0.7885714285714286,
      "VeMo (random view)": 0.7661290322580645,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks in a clockwise circle, swaying one arm and keeping the other arm still."
  },
  "006755": {
    "text": "a person moves to the left side and then to the right side, then stops",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6589470768429243,
      "Minus Multimodal Distance": -1.9177800416946411,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.019718356430530548,
      "MoBERT-F": 0.5281714044717807,
      "MoBERT-N": 0.5745784646632092,
      "MoBERT-min(F/N)": 0.5281714044717807,
      "MoBERT-max(F/N)": 0.5745784646632092,
      "MotionCritic": -1.6070302724838257,
      "VeMo (human-opt view)": 0.946712802768166,
      "VeMo (max entropy view)": 0.8931245745405038,
      "VeMo (min entropy view)": 0.946712802768166,
      "VeMo (random view)": 0.946712802768166,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves to the left side, then to the right side, and then stops."
  },
  "012691": {
    "text": "the person took a large side step to the right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3833638770950279,
      "Minus Multimodal Distance": -7.270864009857178,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.587273411336355e-05,
      "MoBERT-F": 0.4101169269909126,
      "MoBERT-N": 0.5581473194423281,
      "MoBERT-min(F/N)": 0.4101169269909126,
      "MoBERT-max(F/N)": 0.5581473194423281,
      "MotionCritic": -0.9820879101753235,
      "VeMo (human-opt view)": 0.9604943721032885,
      "VeMo (max entropy view)": 0.9579572969403478,
      "VeMo (min entropy view)": 0.9604943721032885,
      "VeMo (random view)": 0.9579572969403478,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person took a large side step to the right."
  },
  "008730": {
    "text": "person is doing a huge stretch  to the right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6224526026124111,
      "Minus Multimodal Distance": -9.863646507263184,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2920576157048345e-05,
      "MoBERT-F": 0.43466392002318477,
      "MoBERT-N": 0.5043534700110403,
      "MoBERT-min(F/N)": 0.43466392002318477,
      "MoBERT-max(F/N)": 0.5043534700110403,
      "MotionCritic": -5.725685119628906,
      "VeMo (human-opt view)": 0.8932515337423312,
      "VeMo (max entropy view)": 0.8667020711630377,
      "VeMo (min entropy view)": 0.8932515337423312,
      "VeMo (random view)": 0.8667020711630377,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is doing a huge stretch to the right."
  },
  "006570": {
    "text": "a person stands still with their arms stretched out.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.17427090354828817,
      "Minus Multimodal Distance": -4.0189595222473145,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.522792030707933e-05,
      "MoBERT-F": 0.3629887427894186,
      "MoBERT-N": 0.44176857267595565,
      "MoBERT-min(F/N)": 0.3629887427894186,
      "MoBERT-max(F/N)": 0.44176857267595565,
      "MotionCritic": -2.9126944541931152,
      "VeMo (human-opt view)": 0.9987550850365142,
      "VeMo (max entropy view)": 0.9987541203021436,
      "VeMo (min entropy view)": 0.9987550850365142,
      "VeMo (random view)": 0.9987541203021436,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands still with their arms stretched out."
  },
  "011035": {
    "text": "a person is walking then stops and sits down",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.46561918778786443,
      "Minus Multimodal Distance": -3.892195224761963,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9639747738838196,
      "MoBERT-F": 0.583583595646731,
      "MoBERT-N": 0.5204416262011748,
      "MoBERT-min(F/N)": 0.5204416262011748,
      "MoBERT-max(F/N)": 0.583583595646731,
      "MotionCritic": -8.96231746673584,
      "VeMo (human-opt view)": 0.3216,
      "VeMo (max entropy view)": 0.3216,
      "VeMo (min entropy view)": 0.9707551287647316,
      "VeMo (random view)": 0.9707551287647316,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking, then stops and sits down."
  },
  "012235": {
    "text": "a person is doing acting like a little teapot.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.28604214364254305,
      "Minus Multimodal Distance": -8.005782127380371,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.688860306283459e-05,
      "MoBERT-F": 0.46559454183391413,
      "MoBERT-N": 0.45272053579808347,
      "MoBERT-min(F/N)": 0.45272053579808347,
      "MoBERT-max(F/N)": 0.46559454183391413,
      "MotionCritic": -6.545135974884033,
      "VeMo (human-opt view)": 0.10056214865708932,
      "VeMo (max entropy view)": 0.2456575682382134,
      "VeMo (min entropy view)": 0.10056214865708932,
      "VeMo (random view)": 0.2456575682382134,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is acting like a little teapot."
  },
  "008052": {
    "text": "a person delivers pizza, receives money, then waves goodbye",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4216527088536039,
      "Minus Multimodal Distance": -3.4594950675964355,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.1404353143880144e-05,
      "MoBERT-F": 0.29607026688459925,
      "MoBERT-N": 0.356557336072393,
      "MoBERT-min(F/N)": 0.29607026688459925,
      "MoBERT-max(F/N)": 0.356557336072393,
      "MotionCritic": -6.5715718269348145,
      "VeMo (human-opt view)": 0.023001095290251915,
      "VeMo (max entropy view)": 0.023001095290251915,
      "VeMo (min entropy view)": 0.019142234068330505,
      "VeMo (random view)": 0.023001095290251915,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person delivers pizza, receives money, then waves goodbye."
  },
  "000304": {
    "text": "a person is walking in a steady forward motion.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3843110677542149,
      "Minus Multimodal Distance": -5.792022705078125,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.02679591067135334,
      "MoBERT-F": 0.6216493809092997,
      "MoBERT-N": 0.548788702568071,
      "MoBERT-min(F/N)": 0.548788702568071,
      "MoBERT-max(F/N)": 0.6216493809092997,
      "MotionCritic": -3.6610965728759766,
      "VeMo (human-opt view)": 0.9580838323353293,
      "VeMo (max entropy view)": 0.9580838323353293,
      "VeMo (min entropy view)": 0.9795918367346939,
      "VeMo (random view)": 0.9580838323353293,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking in a steady forward motion."
  },
  "010617": {
    "text": "a person walks forward then climbs up something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3767708179863088,
      "Minus Multimodal Distance": -5.713298797607422,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0016263610450550914,
      "MoBERT-F": 0.5393380394409153,
      "MoBERT-N": 0.6606714783683175,
      "MoBERT-min(F/N)": 0.5393380394409153,
      "MoBERT-max(F/N)": 0.6606714783683175,
      "MotionCritic": -0.7490171790122986,
      "VeMo (human-opt view)": 0.05657805044308112,
      "VeMo (max entropy view)": 0.07160493827160494,
      "VeMo (min entropy view)": 0.05657805044308112,
      "VeMo (random view)": 0.05657805044308112,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and then climbs up something."
  },
  "004311": {
    "text": "a person stumbles around like they are drunk.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7623954322400323,
      "Minus Multimodal Distance": -3.2048239707946777,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.04370816797018051,
      "MoBERT-F": 0.6460151985735919,
      "MoBERT-N": 0.5819766885300478,
      "MoBERT-min(F/N)": 0.5819766885300478,
      "MoBERT-max(F/N)": 0.6460151985735919,
      "MotionCritic": -8.416435241699219,
      "VeMo (human-opt view)": 0.899135446685879,
      "VeMo (max entropy view)": 0.859721082854799,
      "VeMo (min entropy view)": 0.899135446685879,
      "VeMo (random view)": 0.859721082854799,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stumbles around as if they are drunk."
  },
  "007878": {
    "text": "person backed up and is doing a salute",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8240888589425304,
      "Minus Multimodal Distance": -6.5699262619018555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9895294308662415,
      "MoBERT-F": 0.7194856691124831,
      "MoBERT-N": 0.6111547333057131,
      "MoBERT-min(F/N)": 0.6111547333057131,
      "MoBERT-max(F/N)": 0.7194856691124831,
      "MotionCritic": -5.975443363189697,
      "VeMo (human-opt view)": 0.0004061785948895044,
      "VeMo (max entropy view)": 0.027624309392265192,
      "VeMo (min entropy view)": 0.0004061785948895044,
      "VeMo (random view)": 0.0004061785948895044,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person backed up and is saluting."
  },
  "006658": {
    "text": "the person walks backwards in a straight line",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.25123953840894975,
      "Minus Multimodal Distance": -12.059712409973145,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9952186942100525,
      "MoBERT-F": 0.7982366321119194,
      "MoBERT-N": 0.646012836335657,
      "MoBERT-min(F/N)": 0.646012836335657,
      "MoBERT-max(F/N)": 0.7982366321119194,
      "MotionCritic": -7.433884143829346,
      "VeMo (human-opt view)": 0.7558139534883721,
      "VeMo (max entropy view)": 0.7558139534883721,
      "VeMo (min entropy view)": 0.9553222153592753,
      "VeMo (random view)": 0.7558139534883721,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person walks backwards in a straight line."
  },
  "013474": {
    "text": "the person put his hands on his knees.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5947028414410152,
      "Minus Multimodal Distance": -3.6388678550720215,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8863090847153217e-05,
      "MoBERT-F": 0.34150901750521845,
      "MoBERT-N": 0.4268979228223698,
      "MoBERT-min(F/N)": 0.34150901750521845,
      "MoBERT-max(F/N)": 0.4268979228223698,
      "MotionCritic": -4.066012859344482,
      "VeMo (human-opt view)": 0.9101123595505618,
      "VeMo (max entropy view)": 0.9047013977128335,
      "VeMo (min entropy view)": 0.9101123595505618,
      "VeMo (random view)": 0.9047013977128335,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person put his hands on his knees."
  },
  "000576": {
    "text": "a person walking around bouncing a ball.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9443876144179195,
      "Minus Multimodal Distance": -13.41651725769043,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.6462195515632629,
      "MoBERT-F": 0.6804262080066869,
      "MoBERT-N": 0.6630557371685134,
      "MoBERT-min(F/N)": 0.6630557371685134,
      "MoBERT-max(F/N)": 0.6804262080066869,
      "MotionCritic": -15.33037281036377,
      "VeMo (human-opt view)": 4.921226452331821e-06,
      "VeMo (max entropy view)": 1.0454986901596815e-05,
      "VeMo (min entropy view)": 4.921226452331821e-06,
      "VeMo (random view)": 1.0454986901596815e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking around, bouncing a ball."
  },
  "005869": {
    "text": "a person is walking around the room.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9609095890182645,
      "Minus Multimodal Distance": -3.286804676055908,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8670523762702942,
      "MoBERT-F": 0.7662833083941636,
      "MoBERT-N": 0.660549283374083,
      "MoBERT-min(F/N)": 0.660549283374083,
      "MoBERT-max(F/N)": 0.7662833083941636,
      "MotionCritic": -14.382436752319336,
      "VeMo (human-opt view)": 0.9908640769928168,
      "VeMo (max entropy view)": 0.9908640769928168,
      "VeMo (min entropy view)": 0.9919429239190602,
      "VeMo (random view)": 0.9908640769928168,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking around the room."
  },
  "006987": {
    "text": "a person tapping on a surface",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3093845121627305,
      "Minus Multimodal Distance": -10.340849876403809,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7571953978622332e-05,
      "MoBERT-F": 0.3441656137539628,
      "MoBERT-N": 0.42143279433558645,
      "MoBERT-min(F/N)": 0.3441656137539628,
      "MoBERT-max(F/N)": 0.42143279433558645,
      "MotionCritic": -7.600765705108643,
      "VeMo (human-opt view)": 0.0002612653493392737,
      "VeMo (max entropy view)": 0.003832616347281971,
      "VeMo (min entropy view)": 0.0002612653493392737,
      "VeMo (random view)": 0.003832616347281971,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is tapping on a surface."
  },
  "011146": {
    "text": "the figure rises from a laying position and walks in a clockwise circle, and then lays back down the ground.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.279591953752742,
      "Minus Multimodal Distance": -5.020407199859619,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9764931201934814,
      "MoBERT-F": 0.5585349358472105,
      "MoBERT-N": 0.48100894634701985,
      "MoBERT-min(F/N)": 0.48100894634701985,
      "MoBERT-max(F/N)": 0.5585349358472105,
      "MotionCritic": -9.094671249389648,
      "VeMo (human-opt view)": 0.9769274057400112,
      "VeMo (max entropy view)": 0.9769274057400112,
      "VeMo (min entropy view)": 0.981936887921654,
      "VeMo (random view)": 0.9769274057400112,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The figure rises from a lying position and walks in a clockwise circle, then lies back down on the ground."
  },
  "010843": {
    "text": "a person holds their right arm out on something to support them while sticking their right leg up to balance.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.47935315360703157,
      "Minus Multimodal Distance": -10.833110809326172,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.45267256104853e-05,
      "MoBERT-F": 0.36128028637049603,
      "MoBERT-N": 0.4112210848811817,
      "MoBERT-min(F/N)": 0.36128028637049603,
      "MoBERT-max(F/N)": 0.4112210848811817,
      "MotionCritic": -2.9536478519439697,
      "VeMo (human-opt view)": 0.9327046720960137,
      "VeMo (max entropy view)": 0.9197475202885482,
      "VeMo (min entropy view)": 0.9327046720960137,
      "VeMo (random view)": 0.9327046720960137,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person holds their right arm out on something to support themselves while sticking their right leg up to balance."
  },
  "012498": {
    "text": "pretend to hold a ball in your right hand, toss the ball upward and kick it with your right foot as it falls toward your foot. take steps forward and keep going out of the area.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8488576632007598,
      "Minus Multimodal Distance": -5.708377361297607,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9890285730361938,
      "MoBERT-F": 0.7289352513060796,
      "MoBERT-N": 0.6021146407274075,
      "MoBERT-min(F/N)": 0.6021146407274075,
      "MoBERT-max(F/N)": 0.7289352513060796,
      "MotionCritic": -5.066786289215088,
      "VeMo (human-opt view)": 0.6072351421188631,
      "VeMo (max entropy view)": 0.6072351421188631,
      "VeMo (min entropy view)": 0.6931567328918322,
      "VeMo (random view)": 0.6072351421188631,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person pretends to hold a ball in their right hand, toss the ball upward and kick it with their right foot as it falls toward their foot.  This person takes steps forward and keeps going out of the area."
  },
  "001725": {
    "text": "a man limps to the right side of the room, then runs back to the left side",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8962141984354488,
      "Minus Multimodal Distance": -10.037099838256836,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.04104138910770416,
      "MoBERT-F": 0.6234701921766155,
      "MoBERT-N": 0.6575301857929694,
      "MoBERT-min(F/N)": 0.6234701921766155,
      "MoBERT-max(F/N)": 0.6575301857929694,
      "MotionCritic": -5.0665411949157715,
      "VeMo (human-opt view)": 0.9284928492849285,
      "VeMo (max entropy view)": 0.9148825065274151,
      "VeMo (min entropy view)": 0.9284928492849285,
      "VeMo (random view)": 0.9148825065274151,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man limps to the right side of the room, then runs back to the left side."
  },
  "008173": {
    "text": "the sim appears to push something, then brings both hands to their right ear.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.383405542173237,
      "Minus Multimodal Distance": -9.281210899353027,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.0135612178128213e-05,
      "MoBERT-F": 0.3142264003560363,
      "MoBERT-N": 0.43526126659234793,
      "MoBERT-min(F/N)": 0.3142264003560363,
      "MoBERT-max(F/N)": 0.43526126659234793,
      "MotionCritic": -10.259340286254883,
      "VeMo (human-opt view)": 0.7310704960835509,
      "VeMo (max entropy view)": 0.7310704960835509,
      "VeMo (min entropy view)": 0.25663716814159293,
      "VeMo (random view)": 0.7310704960835509,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The sim appears to push something, then brings both hands to its right ear."
  },
  "012099": {
    "text": "a person lifts something to their face and wobbles their body in circles.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.47286138782121195,
      "Minus Multimodal Distance": -9.443944931030273,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.8649603084195405e-05,
      "MoBERT-F": 0.29913088859613374,
      "MoBERT-N": 0.3390757915498717,
      "MoBERT-min(F/N)": 0.29913088859613374,
      "MoBERT-max(F/N)": 0.3390757915498717,
      "MotionCritic": -7.816077709197998,
      "VeMo (human-opt view)": 0.607940446650124,
      "VeMo (max entropy view)": 0.607940446650124,
      "VeMo (min entropy view)": 0.00010920887803937943,
      "VeMo (random view)": 0.607940446650124,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lifts something to their face and wobbles their body in circles."
  },
  "006172": {
    "text": "a person holds their hands together and bows their head a few times.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.36548059096611524,
      "Minus Multimodal Distance": -9.158904075622559,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9127239031367935e-05,
      "MoBERT-F": 0.3560790455222455,
      "MoBERT-N": 0.393137653080807,
      "MoBERT-min(F/N)": 0.3560790455222455,
      "MoBERT-max(F/N)": 0.393137653080807,
      "MotionCritic": -12.055160522460938,
      "VeMo (human-opt view)": 0.5469061876247505,
      "VeMo (max entropy view)": 0.5469061876247505,
      "VeMo (min entropy view)": 0.21112929623567922,
      "VeMo (random view)": 0.21112929623567922,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person holds their hands together and bows their head a few times."
  },
  "005561": {
    "text": "the sim appears to be standing an object onto their right side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2123225929956279,
      "Minus Multimodal Distance": -6.117486000061035,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4640617994009517e-05,
      "MoBERT-F": 0.33965686467474143,
      "MoBERT-N": 0.48263439615897064,
      "MoBERT-min(F/N)": 0.33965686467474143,
      "MoBERT-max(F/N)": 0.48263439615897064,
      "MotionCritic": -2.8861966133117676,
      "VeMo (human-opt view)": 0.6219512195121951,
      "VeMo (max entropy view)": 0.6219512195121951,
      "VeMo (min entropy view)": 0.6794625719769674,
      "VeMo (random view)": 0.6794625719769674,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person appears to be standing beside an object on their right side."
  },
  "005156": {
    "text": "a person walks forward and kicks their leg out while turning around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5405557196742927,
      "Minus Multimodal Distance": -7.560057640075684,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9943780303001404,
      "MoBERT-F": 0.7222474318731485,
      "MoBERT-N": 0.7173045048201936,
      "MoBERT-min(F/N)": 0.7173045048201936,
      "MoBERT-max(F/N)": 0.7222474318731485,
      "MotionCritic": -8.167991638183594,
      "VeMo (human-opt view)": 0.9840841650930672,
      "VeMo (max entropy view)": 0.9840841650930672,
      "VeMo (min entropy view)": 0.9902897292176521,
      "VeMo (random view)": 0.9840841650930672,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, kicks their leg out, and turns around."
  },
  "008927": {
    "text": "person standing raises right knee upward, then puts foot back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.39746121725014555,
      "Minus Multimodal Distance": -7.795342445373535,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3221446099341847e-05,
      "MoBERT-F": 0.35187229308354484,
      "MoBERT-N": 0.45502988712199877,
      "MoBERT-min(F/N)": 0.35187229308354484,
      "MoBERT-max(F/N)": 0.45502988712199877,
      "MotionCritic": -5.3225998878479,
      "VeMo (human-opt view)": 0.9956784788245462,
      "VeMo (max entropy view)": 0.9937022220461837,
      "VeMo (min entropy view)": 0.9956784788245462,
      "VeMo (random view)": 0.9956784788245462,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person standing raises their right knee upward, then puts their foot back down."
  },
  "013586": {
    "text": "a person takes a large hop forward with both legs.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4088804956927308,
      "Minus Multimodal Distance": -6.373057842254639,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.373163624724839e-05,
      "MoBERT-F": 0.4675062205844904,
      "MoBERT-N": 0.5484053680427544,
      "MoBERT-min(F/N)": 0.4675062205844904,
      "MoBERT-max(F/N)": 0.5484053680427544,
      "MotionCritic": -3.510209560394287,
      "VeMo (human-opt view)": 0.8265682656826568,
      "VeMo (max entropy view)": 0.4072398190045249,
      "VeMo (min entropy view)": 0.8265682656826568,
      "VeMo (random view)": 0.4072398190045249,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person takes a large hop forward with both legs."
  },
  "008174": {
    "text": "a person grabbed something and made some jesture",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.47952634006010547,
      "Minus Multimodal Distance": -4.496978282928467,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.773934647848364e-05,
      "MoBERT-F": 0.34800422063670794,
      "MoBERT-N": 0.38882930176101654,
      "MoBERT-min(F/N)": 0.34800422063670794,
      "MoBERT-max(F/N)": 0.38882930176101654,
      "MotionCritic": -8.714946746826172,
      "VeMo (human-opt view)": 0.8809648662821186,
      "VeMo (max entropy view)": 0.8804283164782868,
      "VeMo (min entropy view)": 0.8809648662821186,
      "VeMo (random view)": 0.8809648662821186,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person grabbed something and made some gesture."
  },
  "006343": {
    "text": "the person is hammering a nail.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.29013843402403494,
      "Minus Multimodal Distance": -7.137883186340332,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.008708128822036e-05,
      "MoBERT-F": 0.33220262617659496,
      "MoBERT-N": 0.4582405380081952,
      "MoBERT-min(F/N)": 0.33220262617659496,
      "MoBERT-max(F/N)": 0.4582405380081952,
      "MotionCritic": -2.4642882347106934,
      "VeMo (human-opt view)": 0.05681515070685516,
      "VeMo (max entropy view)": 0.05681515070685516,
      "VeMo (min entropy view)": 0.05333333333333334,
      "VeMo (random view)": 0.05333333333333334,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is hammering a nail."
  },
  "001152": {
    "text": "a person performs a typical broadjump.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8672864326238993,
      "Minus Multimodal Distance": -11.704950332641602,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.13168402016162872,
      "MoBERT-F": 0.7413297687907322,
      "MoBERT-N": 0.6138154215540779,
      "MoBERT-min(F/N)": 0.6138154215540779,
      "MoBERT-max(F/N)": 0.7413297687907322,
      "MotionCritic": -1.2427936792373657,
      "VeMo (human-opt view)": 0.7874015748031497,
      "VeMo (max entropy view)": 0.7874015748031497,
      "VeMo (min entropy view)": 0.8348745046235139,
      "VeMo (random view)": 0.7874015748031497,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person performs a typical broad jump."
  },
  "004331": {
    "text": "someone dusts a picture hanging on the wall with a cloth in their right hand, steadies the picture with their left hand, then finishes dusting it, and finally dusts all the way around the sides of the frame.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3015392664402998,
      "Minus Multimodal Distance": -4.563384532928467,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 6.786765152355656e-05,
      "MoBERT-F": 0.32846938297852946,
      "MoBERT-N": 0.4254858212643716,
      "MoBERT-min(F/N)": 0.32846938297852946,
      "MoBERT-max(F/N)": 0.4254858212643716,
      "MotionCritic": -11.214766502380371,
      "VeMo (human-opt view)": 7.733308698216615e-05,
      "VeMo (max entropy view)": 0.03732590529247911,
      "VeMo (min entropy view)": 7.733308698216615e-05,
      "VeMo (random view)": 7.733308698216615e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone dusts a picture hanging on the wall with a cloth in their right hand, steadies the picture with their left hand, finishes dusting it, and finally dusts all around the sides of the frame."
  },
  "011412": {
    "text": "a  person doing a limping walk, taking proper step with the right foot and limping with the left",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6247349867103621,
      "Minus Multimodal Distance": -4.035017967224121,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.17705410718917847,
      "MoBERT-F": 0.6172412951726802,
      "MoBERT-N": 0.5914352265180822,
      "MoBERT-min(F/N)": 0.5914352265180822,
      "MoBERT-max(F/N)": 0.6172412951726802,
      "MotionCritic": -4.895507335662842,
      "VeMo (human-opt view)": 0.7773722627737226,
      "VeMo (max entropy view)": 0.7546468401486989,
      "VeMo (min entropy view)": 0.7773722627737226,
      "VeMo (random view)": 0.7546468401486989,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person doing a limping walk, taking proper step with the right foot and limping with the left."
  },
  "011004": {
    "text": "a person picks something up in front of them moves it to the side then moves it back",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.42346042675655904,
      "Minus Multimodal Distance": -4.30111837387085,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.998213494720403e-05,
      "MoBERT-F": 0.32602698753448034,
      "MoBERT-N": 0.398316510077508,
      "MoBERT-min(F/N)": 0.32602698753448034,
      "MoBERT-max(F/N)": 0.398316510077508,
      "MotionCritic": -4.951144695281982,
      "VeMo (human-opt view)": 0.7546468401486989,
      "VeMo (max entropy view)": 0.7546468401486989,
      "VeMo (min entropy view)": 0.7659574468085106,
      "VeMo (random view)": 0.7659574468085106,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person picks something up in front of them, moves it to the side, then moves it back."
  },
  "012941": {
    "text": "person stands still with arms bent forwards, as if gripping some handlebars.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.38986120830419907,
      "Minus Multimodal Distance": -6.035057067871094,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.891422238666564e-05,
      "MoBERT-F": 0.3298181460802599,
      "MoBERT-N": 0.43921818058120043,
      "MoBERT-min(F/N)": 0.3298181460802599,
      "MoBERT-max(F/N)": 0.43921818058120043,
      "MotionCritic": -5.33803129196167,
      "VeMo (human-opt view)": 0.9047013977128335,
      "VeMo (max entropy view)": 0.7422680412371134,
      "VeMo (min entropy view)": 0.9047013977128335,
      "VeMo (random view)": 0.7422680412371134,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands still with arms bent forward, as if they are gripping some handlebars."
  },
  "000710": {
    "text": "a person walks forward while twisting their torso side to side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.49582149084923066,
      "Minus Multimodal Distance": -11.882819175720215,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.6799319786950946e-05,
      "MoBERT-F": 0.3242551749230488,
      "MoBERT-N": 0.531365678654375,
      "MoBERT-min(F/N)": 0.3242551749230488,
      "MoBERT-max(F/N)": 0.531365678654375,
      "MotionCritic": -7.486203670501709,
      "VeMo (human-opt view)": 0.22237017310252996,
      "VeMo (max entropy view)": 0.2693498452012384,
      "VeMo (min entropy view)": 0.22237017310252996,
      "VeMo (random view)": 0.22237017310252996,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward while twisting their torso from side to side."
  },
  "013343": {
    "text": "he walked over a high object which was in his way.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5693689777835413,
      "Minus Multimodal Distance": -8.848485946655273,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7345213311491534e-05,
      "MoBERT-F": 0.38523405507097974,
      "MoBERT-N": 0.4905684486281882,
      "MoBERT-min(F/N)": 0.38523405507097974,
      "MoBERT-max(F/N)": 0.4905684486281882,
      "MotionCritic": -3.350663661956787,
      "VeMo (human-opt view)": 0.010309278350515464,
      "VeMo (max entropy view)": 0.010309278350515464,
      "VeMo (min entropy view)": 0.006290991835095704,
      "VeMo (random view)": 0.006290991835095704,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "He walked over a high object which was in his way."
  },
  "002741": {
    "text": "the man performed a tennis smash that won the match.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0126064182823735,
      "Minus Multimodal Distance": -12.066904067993164,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9490067958831787,
      "MoBERT-F": 0.7533089463902299,
      "MoBERT-N": 0.6778795309079099,
      "MoBERT-min(F/N)": 0.6778795309079099,
      "MoBERT-max(F/N)": 0.7533089463902299,
      "MotionCritic": -6.102039813995361,
      "VeMo (human-opt view)": 0.21182266009852216,
      "VeMo (max entropy view)": 0.21182266009852216,
      "VeMo (min entropy view)": 0.16443189837685251,
      "VeMo (random view)": 0.21182266009852216,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man performed a tennis smash that won the match."
  },
  "009140": {
    "text": "a figure lunges backwards in a flexed pose.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0381286412521338,
      "Minus Multimodal Distance": -5.618651390075684,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.48821261525154114,
      "MoBERT-F": 0.5042526120813693,
      "MoBERT-N": 0.5521106037806656,
      "MoBERT-min(F/N)": 0.5042526120813693,
      "MoBERT-max(F/N)": 0.5521106037806656,
      "MotionCritic": -7.9927473068237305,
      "VeMo (human-opt view)": 0.0008045878627064966,
      "VeMo (max entropy view)": 0.004311514457141135,
      "VeMo (min entropy view)": 0.0008045878627064966,
      "VeMo (random view)": 0.004311514457141135,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure lunges backward in a flexed pose."
  },
  "003195": {
    "text": "a person is standing and steps backwards.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.24089749483435585,
      "Minus Multimodal Distance": -4.415448188781738,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.5489084124565125,
      "MoBERT-F": 0.672648614014989,
      "MoBERT-N": 0.5912209249515104,
      "MoBERT-min(F/N)": 0.5912209249515104,
      "MoBERT-max(F/N)": 0.672648614014989,
      "MotionCritic": -5.350775241851807,
      "VeMo (human-opt view)": 0.8436724565756824,
      "VeMo (max entropy view)": 0.8436724565756824,
      "VeMo (min entropy view)": 0.9193934557063048,
      "VeMo (random view)": 0.8436724565756824,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing and steps backward."
  },
  "000063": {
    "text": "a person is drying their right arm",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6618549398158973,
      "Minus Multimodal Distance": -4.794003009796143,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.267415715730749e-05,
      "MoBERT-F": 0.3067002564371779,
      "MoBERT-N": 0.3707776016548765,
      "MoBERT-min(F/N)": 0.3067002564371779,
      "MoBERT-max(F/N)": 0.3707776016548765,
      "MotionCritic": -9.979805946350098,
      "VeMo (human-opt view)": 0.24537607891491986,
      "VeMo (max entropy view)": 0.6647807637906648,
      "VeMo (min entropy view)": 0.24537607891491986,
      "VeMo (random view)": 0.6647807637906648,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is drying their right arm."
  },
  "007818": {
    "text": "a person wipes with their right hand .",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3168507696125189,
      "Minus Multimodal Distance": -11.332183837890625,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.893182995147072e-05,
      "MoBERT-F": 0.33422273760457755,
      "MoBERT-N": 0.36269691298966594,
      "MoBERT-min(F/N)": 0.33422273760457755,
      "MoBERT-max(F/N)": 0.36269691298966594,
      "MotionCritic": -4.501307010650635,
      "VeMo (human-opt view)": 0.9243840271877655,
      "VeMo (max entropy view)": 0.8932228181374939,
      "VeMo (min entropy view)": 0.9243840271877655,
      "VeMo (random view)": 0.8932228181374939,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person wipes with their right hand."
  },
  "002332": {
    "text": "a person walks straight forward quickly",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.47870472584894325,
      "Minus Multimodal Distance": -3.754873275756836,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8586666303453967e-05,
      "MoBERT-F": 0.39596339981350803,
      "MoBERT-N": 0.5064331064382905,
      "MoBERT-min(F/N)": 0.39596339981350803,
      "MoBERT-max(F/N)": 0.5064331064382905,
      "MotionCritic": -5.778651237487793,
      "VeMo (human-opt view)": 0.7669616519174042,
      "VeMo (max entropy view)": 0.7660910518053375,
      "VeMo (min entropy view)": 0.7669616519174042,
      "VeMo (random view)": 0.7660910518053375,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks straight forward quickly."
  },
  "008235": {
    "text": "a person jumps into the air.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7127908995898388,
      "Minus Multimodal Distance": -11.278005599975586,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8301423788070679,
      "MoBERT-F": 0.7473651204935072,
      "MoBERT-N": 0.7127060029069489,
      "MoBERT-min(F/N)": 0.7127060029069489,
      "MoBERT-max(F/N)": 0.7473651204935072,
      "MotionCritic": -7.867926120758057,
      "VeMo (human-opt view)": 0.014070839398350316,
      "VeMo (max entropy view)": 0.014070839398350316,
      "VeMo (min entropy view)": 0.0005195132318445868,
      "VeMo (random view)": 0.014070839398350316,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps into the air."
  },
  "003942": {
    "text": "walking around in a circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4736799001150345,
      "Minus Multimodal Distance": -5.086642265319824,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8198732137680054,
      "MoBERT-F": 0.7880659837249433,
      "MoBERT-N": 0.7370957325260303,
      "MoBERT-min(F/N)": 0.7370957325260303,
      "MoBERT-max(F/N)": 0.7880659837249433,
      "MotionCritic": -13.21541976928711,
      "VeMo (human-opt view)": 0.718801996672213,
      "VeMo (max entropy view)": 0.718801996672213,
      "VeMo (min entropy view)": 0.7883211678832117,
      "VeMo (random view)": 0.7883211678832117,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking around in a circle."
  },
  "002789": {
    "text": "a man walks slowly forwards.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.249285846619149,
      "Minus Multimodal Distance": -12.6211519241333,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4475135433021933e-05,
      "MoBERT-F": 0.35498652499962363,
      "MoBERT-N": 0.46371413822256485,
      "MoBERT-min(F/N)": 0.35498652499962363,
      "MoBERT-max(F/N)": 0.46371413822256485,
      "MotionCritic": -4.880799770355225,
      "VeMo (human-opt view)": 0.9724658372425046,
      "VeMo (max entropy view)": 0.9603384452670545,
      "VeMo (min entropy view)": 0.9724658372425046,
      "VeMo (random view)": 0.9603384452670545,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks slowly forward."
  },
  "007995": {
    "text": "movin  right hand upward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.25567532695507067,
      "Minus Multimodal Distance": -6.811886310577393,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00010804059274960309,
      "MoBERT-F": 0.2689305750437163,
      "MoBERT-N": 0.30543444606023795,
      "MoBERT-min(F/N)": 0.2689305750437163,
      "MoBERT-max(F/N)": 0.30543444606023795,
      "MotionCritic": -11.6018648147583,
      "VeMo (human-opt view)": 0.08033613445378152,
      "VeMo (max entropy view)": 0.3922413793103448,
      "VeMo (min entropy view)": 0.08033613445378152,
      "VeMo (random view)": 0.3922413793103448,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "a person moving right hand upward"
  },
  "006118": {
    "text": "standing on one foot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.43937456417511456,
      "Minus Multimodal Distance": -5.086575984954834,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.235022399574518e-05,
      "MoBERT-F": 0.33875297490505396,
      "MoBERT-N": 0.4456627187340217,
      "MoBERT-min(F/N)": 0.33875297490505396,
      "MoBERT-max(F/N)": 0.4456627187340217,
      "MotionCritic": -4.9774394035339355,
      "VeMo (human-opt view)": 0.00589314287944425,
      "VeMo (max entropy view)": 0.07589285714285714,
      "VeMo (min entropy view)": 0.00589314287944425,
      "VeMo (random view)": 0.07589285714285714,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is standing on one foot."
  },
  "004235": {
    "text": "a person steps forward and reaches out with their right hand to pick up an object that is at their waist level. they raise it to their mouth and take a sip, then put the object down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4483619464829462,
      "Minus Multimodal Distance": -10.452778816223145,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.507766657799948e-05,
      "MoBERT-F": 0.30227914701233666,
      "MoBERT-N": 0.4779549412610691,
      "MoBERT-min(F/N)": 0.30227914701233666,
      "MoBERT-max(F/N)": 0.4779549412610691,
      "MotionCritic": -5.609004497528076,
      "VeMo (human-opt view)": 1.5687160641925473e-05,
      "VeMo (max entropy view)": 1.5687160641925473e-05,
      "VeMo (min entropy view)": 9.232939980933494e-06,
      "VeMo (random view)": 1.5687160641925473e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps forward, reaches out with their right hand to pick up an object at waist level. They raise it to their mouth, take a sip, then put the object down."
  },
  "013130": {
    "text": "a person runs forward with one leg crossing in front of the other repetitively before coming to a stop.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5133092256733807,
      "Minus Multimodal Distance": -4.248985290527344,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.02973560057580471,
      "MoBERT-F": 0.6030371977337203,
      "MoBERT-N": 0.6082728417562782,
      "MoBERT-min(F/N)": 0.6030371977337203,
      "MoBERT-max(F/N)": 0.6082728417562782,
      "MotionCritic": -7.61852502822876,
      "VeMo (human-opt view)": 0.9399545602077247,
      "VeMo (max entropy view)": 0.8871989860583016,
      "VeMo (min entropy view)": 0.9399545602077247,
      "VeMo (random view)": 0.8871989860583016,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs forward with one leg crossing in front of the other repetitively before coming to a stop."
  },
  "000348": {
    "text": "a person kicks their right leg up twice while hopping between feet, then punches their right arm",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2310464776441659,
      "Minus Multimodal Distance": -9.327899932861328,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9181668162345886,
      "MoBERT-F": 0.7745795974731442,
      "MoBERT-N": 0.641254202813998,
      "MoBERT-min(F/N)": 0.641254202813998,
      "MoBERT-max(F/N)": 0.7745795974731442,
      "MotionCritic": -1.4998530149459839,
      "VeMo (human-opt view)": 0.852233676975945,
      "VeMo (max entropy view)": 0.852233676975945,
      "VeMo (min entropy view)": 0.8866666666666667,
      "VeMo (random view)": 0.8866666666666667,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person kicks their right leg up twice while hopping from one foot to the other, then punches with their right arm."
  },
  "010145": {
    "text": "a person is pushed hard to their left and they recover into a standing position.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3562225624303003,
      "Minus Multimodal Distance": -9.601540565490723,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4630255211377516e-05,
      "MoBERT-F": 0.30657412326966027,
      "MoBERT-N": 0.423267378918782,
      "MoBERT-min(F/N)": 0.30657412326966027,
      "MoBERT-max(F/N)": 0.423267378918782,
      "MotionCritic": -2.0161967277526855,
      "VeMo (human-opt view)": 0.7318007662835249,
      "VeMo (max entropy view)": 0.6787003610108303,
      "VeMo (min entropy view)": 0.7318007662835249,
      "VeMo (random view)": 0.6787003610108303,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is pushed hard to their left and then recovers to a standing position."
  },
  "013558": {
    "text": "a person is pulling something and then walks and puts their arms out",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9055375797443369,
      "Minus Multimodal Distance": -7.573144435882568,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4990384190459736e-05,
      "MoBERT-F": 0.3481226918025363,
      "MoBERT-N": 0.4519000918777775,
      "MoBERT-min(F/N)": 0.3481226918025363,
      "MoBERT-max(F/N)": 0.4519000918777775,
      "MotionCritic": -8.115644454956055,
      "VeMo (human-opt view)": 0.7544783983140148,
      "VeMo (max entropy view)": 0.6932952924393724,
      "VeMo (min entropy view)": 0.7544783983140148,
      "VeMo (random view)": 0.7544783983140148,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is pulling something and then walking and putting their arms out."
  },
  "009977": {
    "text": "person swings right arm as if pitching a ball to the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5000867529282843,
      "Minus Multimodal Distance": -7.573144435882568,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00014847758575342596,
      "MoBERT-F": 0.5016604117678699,
      "MoBERT-N": 0.5015302458732268,
      "MoBERT-min(F/N)": 0.5015302458732268,
      "MoBERT-max(F/N)": 0.5016604117678699,
      "MotionCritic": -8.225347518920898,
      "VeMo (human-opt view)": 0.7984570877531341,
      "VeMo (max entropy view)": 0.7770582793709528,
      "VeMo (min entropy view)": 0.7984570877531341,
      "VeMo (random view)": 0.7770582793709528,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person swings their right arm as if pitching a ball to the left."
  },
  "012791": {
    "text": "a person standing raises his right hand to talk on the phone.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2654142557770491,
      "Minus Multimodal Distance": -6.86422061920166,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.171396722085774e-05,
      "MoBERT-F": 0.2796731453324697,
      "MoBERT-N": 0.3761151583357027,
      "MoBERT-min(F/N)": 0.2796731453324697,
      "MoBERT-max(F/N)": 0.3761151583357027,
      "MotionCritic": -8.46349048614502,
      "VeMo (human-opt view)": 0.2011747430249633,
      "VeMo (max entropy view)": 0.2011747430249633,
      "VeMo (min entropy view)": 0.004903330708061279,
      "VeMo (random view)": 0.2011747430249633,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person standing raises his right hand to talk on the phone."
  },
  "006853": {
    "text": "sitting down and crossing legs then sitting on the floor.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8810815407532803,
      "Minus Multimodal Distance": -11.396255493164062,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9837467074394226,
      "MoBERT-F": 0.5558715617069918,
      "MoBERT-N": 0.559141832496699,
      "MoBERT-min(F/N)": 0.5558715617069918,
      "MoBERT-max(F/N)": 0.559141832496699,
      "MotionCritic": -9.105257034301758,
      "VeMo (human-opt view)": 0.9466666666666667,
      "VeMo (max entropy view)": 0.7879924953095685,
      "VeMo (min entropy view)": 0.9466666666666667,
      "VeMo (random view)": 0.9466666666666667,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is sitting down, crossing their legs, and then sitting on the floor."
  },
  "006420": {
    "text": "a figure spins rapidly with arms outstretched then adjusts sleeves",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1762485758293586,
      "Minus Multimodal Distance": -4.475533485412598,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.993573009967804,
      "MoBERT-F": 0.7538468450976854,
      "MoBERT-N": 0.7282341814948808,
      "MoBERT-min(F/N)": 0.7282341814948808,
      "MoBERT-max(F/N)": 0.7538468450976854,
      "MotionCritic": -6.510036945343018,
      "VeMo (human-opt view)": 0.007580978635423846,
      "VeMo (max entropy view)": 0.007580978635423846,
      "VeMo (min entropy view)": 0.004331300274021037,
      "VeMo (random view)": 0.004331300274021037,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure spins rapidly with arms outstretched and then adjusts its sleeves."
  },
  "004991": {
    "text": "a person flaps their arms like a chicken.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.43569451121926894,
      "Minus Multimodal Distance": -12.667380332946777,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.989342803834006e-05,
      "MoBERT-F": 0.36106162839680067,
      "MoBERT-N": 0.43802265709346255,
      "MoBERT-min(F/N)": 0.36106162839680067,
      "MoBERT-max(F/N)": 0.43802265709346255,
      "MotionCritic": -6.053378582000732,
      "VeMo (human-opt view)": 0.43700787401574803,
      "VeMo (max entropy view)": 0.43700787401574803,
      "VeMo (min entropy view)": 0.718801996672213,
      "VeMo (random view)": 0.718801996672213,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person flaps their arms like a chicken."
  },
  "010828": {
    "text": "a person walks forward and stumbles a bit",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.509120285375763,
      "Minus Multimodal Distance": -7.5613932609558105,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.412174035271164e-05,
      "MoBERT-F": 0.3570451674788395,
      "MoBERT-N": 0.4800111382281872,
      "MoBERT-min(F/N)": 0.3570451674788395,
      "MoBERT-max(F/N)": 0.4800111382281872,
      "MotionCritic": -4.692965507507324,
      "VeMo (human-opt view)": 0.9951050964583933,
      "VeMo (max entropy view)": 0.9914517986113534,
      "VeMo (min entropy view)": 0.9951050964583933,
      "VeMo (random view)": 0.9951050964583933,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and stumbles a little."
  },
  "002550": {
    "text": "a person walks forward in an askew line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.41924045803782284,
      "Minus Multimodal Distance": -2.7530324459075928,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.42331150022801e-05,
      "MoBERT-F": 0.463148554495975,
      "MoBERT-N": 0.5682309066669353,
      "MoBERT-min(F/N)": 0.463148554495975,
      "MoBERT-max(F/N)": 0.5682309066669353,
      "MotionCritic": -2.8818883895874023,
      "VeMo (human-opt view)": 0.9198012775017743,
      "VeMo (max entropy view)": 0.9099041533546326,
      "VeMo (min entropy view)": 0.9198012775017743,
      "VeMo (random view)": 0.9099041533546326,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward in an askew line."
  },
  "012675": {
    "text": "person walks with both arms straight forward",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0058211366319292,
      "Minus Multimodal Distance": -6.565577983856201,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.227054523653351e-05,
      "MoBERT-F": 0.494038230502157,
      "MoBERT-N": 0.5206834604489069,
      "MoBERT-min(F/N)": 0.494038230502157,
      "MoBERT-max(F/N)": 0.5206834604489069,
      "MotionCritic": -5.705596446990967,
      "VeMo (human-opt view)": 0.7055837563451777,
      "VeMo (max entropy view)": 0.42196531791907516,
      "VeMo (min entropy view)": 0.7055837563451777,
      "VeMo (random view)": 0.7055837563451777,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks with both arms stretched straight forward."
  },
  "012741": {
    "text": "the man walks and turns right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6652545125766536,
      "Minus Multimodal Distance": -5.177229881286621,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 6.189572013681754e-05,
      "MoBERT-F": 0.4819126394914045,
      "MoBERT-N": 0.5846498454298932,
      "MoBERT-min(F/N)": 0.4819126394914045,
      "MoBERT-max(F/N)": 0.5846498454298932,
      "MotionCritic": -3.612666606903076,
      "VeMo (human-opt view)": 0.9553113553113554,
      "VeMo (max entropy view)": 0.9553113553113554,
      "VeMo (min entropy view)": 0.9687620516775935,
      "VeMo (random view)": 0.9687620516775935,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man walks and turns right."
  },
  "010248": {
    "text": "a person climbs up some ladders",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.168209676812284,
      "Minus Multimodal Distance": -5.618626117706299,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9330851435661316,
      "MoBERT-F": 0.8299821766463383,
      "MoBERT-N": 0.6874627416756518,
      "MoBERT-min(F/N)": 0.6874627416756518,
      "MoBERT-max(F/N)": 0.8299821766463383,
      "MotionCritic": -8.418265342712402,
      "VeMo (human-opt view)": 4.768348844772125e-06,
      "VeMo (max entropy view)": 4.768348844772125e-06,
      "VeMo (min entropy view)": 3.832343214203208e-06,
      "VeMo (random view)": 3.832343214203208e-06,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person climbs up some ladders."
  },
  "007941": {
    "text": "moving the left hand on stand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.15313837828652038,
      "Minus Multimodal Distance": -7.0668134689331055,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8117781766923144e-05,
      "MoBERT-F": 0.4186055897694031,
      "MoBERT-N": 0.47955265732637314,
      "MoBERT-min(F/N)": 0.4186055897694031,
      "MoBERT-max(F/N)": 0.47955265732637314,
      "MotionCritic": -9.8412504196167,
      "VeMo (human-opt view)": 0.9398058252427185,
      "VeMo (max entropy view)": 0.9240801117838845,
      "VeMo (min entropy view)": 0.9398058252427185,
      "VeMo (random view)": 0.9240801117838845,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is moving the left hand while standing."
  },
  "001014": {
    "text": "person has arms crossing.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.18995535134269567,
      "Minus Multimodal Distance": -7.086043834686279,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.318646046565846e-05,
      "MoBERT-F": 0.320281041039055,
      "MoBERT-N": 0.41244585275408874,
      "MoBERT-min(F/N)": 0.320281041039055,
      "MoBERT-max(F/N)": 0.41244585275408874,
      "MotionCritic": -1.4797275066375732,
      "VeMo (human-opt view)": 0.9924321115892566,
      "VeMo (max entropy view)": 0.8595988538681948,
      "VeMo (min entropy view)": 0.9924321115892566,
      "VeMo (random view)": 0.8595988538681948,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person has their arms crossed."
  },
  "005851": {
    "text": "a person, standing, raises his right hand as if to check his watch and then lowers his hand back to his side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.20849044213068083,
      "Minus Multimodal Distance": -10.953105926513672,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.806457243626937e-05,
      "MoBERT-F": 0.2679336645991258,
      "MoBERT-N": 0.3928199095479344,
      "MoBERT-min(F/N)": 0.2679336645991258,
      "MoBERT-max(F/N)": 0.3928199095479344,
      "MotionCritic": -4.70436429977417,
      "VeMo (human-opt view)": 0.9784011220196354,
      "VeMo (max entropy view)": 0.8740894901144641,
      "VeMo (min entropy view)": 0.9784011220196354,
      "VeMo (random view)": 0.9784011220196354,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person, while standing, raises his right hand as if to check his watch and then lowers his hand back to his side."
  },
  "008567": {
    "text": "a person shifts their weight from side to side, first settling back on their left foot, then their right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5641261948316502,
      "Minus Multimodal Distance": -1.9175962209701538,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.07631094008684158,
      "MoBERT-F": 0.6089691805997943,
      "MoBERT-N": 0.6309016227504336,
      "MoBERT-min(F/N)": 0.6089691805997943,
      "MoBERT-max(F/N)": 0.6309016227504336,
      "MotionCritic": -6.636837482452393,
      "VeMo (human-opt view)": 0.928537170263789,
      "VeMo (max entropy view)": 0.9149736644093304,
      "VeMo (min entropy view)": 0.928537170263789,
      "VeMo (random view)": 0.928537170263789,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person shifts their weight from side to side, first settling back on their left foot, then their right."
  },
  "000781": {
    "text": "a man raises left foot knee high then swings out and puts down repaets this motion twice",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5533234499901458,
      "Minus Multimodal Distance": -10.010184288024902,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00019560866348911077,
      "MoBERT-F": 0.5035792994297448,
      "MoBERT-N": 0.5413000023202883,
      "MoBERT-min(F/N)": 0.5035792994297448,
      "MoBERT-max(F/N)": 0.5413000023202883,
      "MotionCritic": -6.0309624671936035,
      "VeMo (human-opt view)": 0.9399848062800709,
      "VeMo (max entropy view)": 0.9399848062800709,
      "VeMo (min entropy view)": 0.9525048796356539,
      "VeMo (random view)": 0.9525048796356539,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man raises his left foot knee - high, then swings it out and puts it down. He repeats this motion twice."
  },
  "004997": {
    "text": "a person holds their head with both hands then bats away something with their right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.38280735035292357,
      "Minus Multimodal Distance": -7.689002513885498,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.165199450450018e-05,
      "MoBERT-F": 0.30286572801200984,
      "MoBERT-N": 0.3537124566715723,
      "MoBERT-min(F/N)": 0.30286572801200984,
      "MoBERT-max(F/N)": 0.3537124566715723,
      "MotionCritic": 0.3432306945323944,
      "VeMo (human-opt view)": 0.03741029164757978,
      "VeMo (max entropy view)": 0.4838709677419355,
      "VeMo (min entropy view)": 0.03741029164757978,
      "VeMo (random view)": 0.03741029164757978,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person holds their head with both hands, then bats away something with their right hand."
  },
  "009351": {
    "text": "a person squats to almost parallel then jumps to the horizontally to the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.41359764565291174,
      "Minus Multimodal Distance": -7.08700704574585,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.07919679582118988,
      "MoBERT-F": 0.6330906332469791,
      "MoBERT-N": 0.6856449665367892,
      "MoBERT-min(F/N)": 0.6330906332469791,
      "MoBERT-max(F/N)": 0.6856449665367892,
      "MotionCritic": -12.262240409851074,
      "VeMo (human-opt view)": 0.7665647298674821,
      "VeMo (max entropy view)": 0.6372980910425844,
      "VeMo (min entropy view)": 0.7665647298674821,
      "VeMo (random view)": 0.6372980910425844,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person squats almost to a parallel position and then jumps horizontally to the left."
  },
  "000523": {
    "text": "a person repeatedly lunges forward and lifts their right armin front of their face.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6377835200920046,
      "Minus Multimodal Distance": -11.867810249328613,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.562633694149554e-05,
      "MoBERT-F": 0.2626601169065627,
      "MoBERT-N": 0.41139701542764273,
      "MoBERT-min(F/N)": 0.2626601169065627,
      "MoBERT-max(F/N)": 0.41139701542764273,
      "MotionCritic": -5.807403564453125,
      "VeMo (human-opt view)": 0.8442437923250564,
      "VeMo (max entropy view)": 0.7549407114624506,
      "VeMo (min entropy view)": 0.8442437923250564,
      "VeMo (random view)": 0.8442437923250564,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person repeatedly lunges forward and lifts their right arm in front of their face."
  },
  "007283": {
    "text": "a person lifts up their left leg while shifting on their right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7126605459278021,
      "Minus Multimodal Distance": -7.598386287689209,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.05249015986919403,
      "MoBERT-F": 0.6247809796510096,
      "MoBERT-N": 0.5790682593095456,
      "MoBERT-min(F/N)": 0.5790682593095456,
      "MoBERT-max(F/N)": 0.6247809796510096,
      "MotionCritic": -6.771753311157227,
      "VeMo (human-opt view)": 0.9688206055128784,
      "VeMo (max entropy view)": 0.949685534591195,
      "VeMo (min entropy view)": 0.9688206055128784,
      "VeMo (random view)": 0.9688206055128784,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person lifts their left leg while shifting their weight onto their right."
  },
  "014169": {
    "text": "it looks like the person is taking care of his shoes",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8717953481604152,
      "Minus Multimodal Distance": -8.357415199279785,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.053401087527163e-05,
      "MoBERT-F": 0.3258874710389956,
      "MoBERT-N": 0.4166903611350307,
      "MoBERT-min(F/N)": 0.3258874710389956,
      "MoBERT-max(F/N)": 0.4166903611350307,
      "MotionCritic": -3.433530569076538,
      "VeMo (human-opt view)": 0.00016314274253989943,
      "VeMo (max entropy view)": 0.00043167121837574485,
      "VeMo (min entropy view)": 0.00016314274253989943,
      "VeMo (random view)": 0.00016314274253989943,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "It looks like the person is taking care of his shoes."
  },
  "007676": {
    "text": "a person in a defensive pose leans right then left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7541275394899879,
      "Minus Multimodal Distance": -10.115007400512695,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5220126190106384e-05,
      "MoBERT-F": 0.390551171360972,
      "MoBERT-N": 0.5181891462962571,
      "MoBERT-min(F/N)": 0.390551171360972,
      "MoBERT-max(F/N)": 0.5181891462962571,
      "MotionCritic": 1.9173861742019653,
      "VeMo (human-opt view)": 0.9196956889264581,
      "VeMo (max entropy view)": 0.9152,
      "VeMo (min entropy view)": 0.9196956889264581,
      "VeMo (random view)": 0.9152,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person in a defensive pose leans to the right and then to the left."
  },
  "007822": {
    "text": "a person jumps from side to side left to right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8433006095892218,
      "Minus Multimodal Distance": -5.054903507232666,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9050763845443726,
      "MoBERT-F": 0.7372666110015889,
      "MoBERT-N": 0.7280685920518055,
      "MoBERT-min(F/N)": 0.7280685920518055,
      "MoBERT-max(F/N)": 0.7372666110015889,
      "MotionCritic": -5.987520217895508,
      "VeMo (human-opt view)": 0.8521303258145363,
      "VeMo (max entropy view)": 0.8352668213457076,
      "VeMo (min entropy view)": 0.8521303258145363,
      "VeMo (random view)": 0.8521303258145363,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps from side to side, left to right."
  },
  "000019": {
    "text": "person jogs around to the left and right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9727951664458925,
      "Minus Multimodal Distance": -10.240713119506836,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.04134947806596756,
      "MoBERT-F": 0.616391430330465,
      "MoBERT-N": 0.6684581910524379,
      "MoBERT-min(F/N)": 0.616391430330465,
      "MoBERT-max(F/N)": 0.6684581910524379,
      "MotionCritic": -10.823291778564453,
      "VeMo (human-opt view)": 0.8598781549173194,
      "VeMo (max entropy view)": 0.8598781549173194,
      "VeMo (min entropy view)": 0.9152,
      "VeMo (random view)": 0.9152,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jogs from side to side."
  },
  "006759": {
    "text": "a figure steps backward slowly and carefully",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3637352030472078,
      "Minus Multimodal Distance": -6.704823970794678,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9845253825187683,
      "MoBERT-F": 0.6647568265934143,
      "MoBERT-N": 0.5665841240617056,
      "MoBERT-min(F/N)": 0.5665841240617056,
      "MoBERT-max(F/N)": 0.6647568265934143,
      "MotionCritic": -7.27539587020874,
      "VeMo (human-opt view)": 0.638121546961326,
      "VeMo (max entropy view)": 0.638121546961326,
      "VeMo (min entropy view)": 0.8264984227129337,
      "VeMo (random view)": 0.638121546961326,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure steps backward slowly and carefully."
  },
  "006117": {
    "text": "someone being pushed back a few steps",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.23325611293262186,
      "Minus Multimodal Distance": -7.4250054359436035,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9898125529289246,
      "MoBERT-F": 0.7349299068126827,
      "MoBERT-N": 0.6157237009471065,
      "MoBERT-min(F/N)": 0.6157237009471065,
      "MoBERT-max(F/N)": 0.7349299068126827,
      "MotionCritic": -3.497446060180664,
      "VeMo (human-opt view)": 0.43738656987295826,
      "VeMo (max entropy view)": 0.4688427299703264,
      "VeMo (min entropy view)": 0.43738656987295826,
      "VeMo (random view)": 0.43738656987295826,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone is pushed back a few steps."
  },
  "005139": {
    "text": "a man squats and walks forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.766435544489481,
      "Minus Multimodal Distance": -4.144680500030518,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.036971740424633026,
      "MoBERT-F": 0.5830192681716628,
      "MoBERT-N": 0.4820325846005847,
      "MoBERT-min(F/N)": 0.4820325846005847,
      "MoBERT-max(F/N)": 0.5830192681716628,
      "MotionCritic": -6.32343864440918,
      "VeMo (human-opt view)": 0.9983015347119083,
      "VeMo (max entropy view)": 0.9947933863307342,
      "VeMo (min entropy view)": 0.9983015347119083,
      "VeMo (random view)": 0.9983015347119083,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man squats and walks forward."
  },
  "012240": {
    "text": "a person bends down to their left side, then takes their left hand and places it on their right arm, and finally bends back down to the left with both hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.49996851417673116,
      "Minus Multimodal Distance": -6.82237434387207,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.196436955477111e-05,
      "MoBERT-F": 0.26511074528462186,
      "MoBERT-N": 0.4047229751978184,
      "MoBERT-min(F/N)": 0.26511074528462186,
      "MoBERT-max(F/N)": 0.4047229751978184,
      "MotionCritic": -10.70650863647461,
      "VeMo (human-opt view)": 0.7188703465982028,
      "VeMo (max entropy view)": 0.6215469613259669,
      "VeMo (min entropy view)": 0.7188703465982028,
      "VeMo (random view)": 0.6215469613259669,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends down to their left side, then uses their left hand to place it on their right arm, and finally bends down to the left again with both hands."
  },
  "006123": {
    "text": "a person uses their right hands to move their right foot to rest against the last leg in a yoga pose, then repeats the same motion with the left leg.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5788269493859595,
      "Minus Multimodal Distance": -2.5605642795562744,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.04828083515167236,
      "MoBERT-F": 0.6623092580446213,
      "MoBERT-N": 0.56377164425845,
      "MoBERT-min(F/N)": 0.56377164425845,
      "MoBERT-max(F/N)": 0.6623092580446213,
      "MotionCritic": -5.382069110870361,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.37786259541984735,
      "VeMo (random view)": 0.37786259541984735,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person uses their right hand to move their right foot to rest against the last leg in a yoga pose, then repeats the same motion with the left leg."
  },
  "008803": {
    "text": "a person holds their arms out to each side, teetering them up and down slightly. then, they begin to rotate their arms in slow, wide circles,",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.2162728006196128,
      "Minus Multimodal Distance": -9.929628372192383,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00011027921573258936,
      "MoBERT-F": 0.5037136332660397,
      "MoBERT-N": 0.4263671785709799,
      "MoBERT-min(F/N)": 0.4263671785709799,
      "MoBERT-max(F/N)": 0.5037136332660397,
      "MotionCritic": -3.8505115509033203,
      "VeMo (human-opt view)": 0.608955223880597,
      "VeMo (max entropy view)": 0.608955223880597,
      "VeMo (min entropy view)": 0.6779661016949152,
      "VeMo (random view)": 0.6779661016949152,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person holds their arms out to each side, teetering them up and down slightly. Then, they begin to rotate their arms in slow, wide circles."
  },
  "014499": {
    "text": "a person brings his arms which were in the air along his body. his knees appear to be bent.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7479367947376999,
      "Minus Multimodal Distance": -9.176878929138184,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7425490770838223e-05,
      "MoBERT-F": 0.33984312366047487,
      "MoBERT-N": 0.44814507918605406,
      "MoBERT-min(F/N)": 0.33984312366047487,
      "MoBERT-max(F/N)": 0.44814507918605406,
      "MotionCritic": -8.790952682495117,
      "VeMo (human-opt view)": 0.9706591070163005,
      "VeMo (max entropy view)": 0.9325899645210339,
      "VeMo (min entropy view)": 0.9706591070163005,
      "VeMo (random view)": 0.9706591070163005,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person brings his arms, which were in the air, down along his body. His knees appear to be bent."
  },
  "000306": {
    "text": "a person picks up two objects and begins performing a motioning action with both objects.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7260359536236726,
      "Minus Multimodal Distance": -10.5128812789917,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4638502509333193e-05,
      "MoBERT-F": 0.4182640790969945,
      "MoBERT-N": 0.46989870366710274,
      "MoBERT-min(F/N)": 0.4182640790969945,
      "MoBERT-max(F/N)": 0.46989870366710274,
      "MotionCritic": -2.376310110092163,
      "VeMo (human-opt view)": 0.00033512552636563654,
      "VeMo (max entropy view)": 0.00033512552636563654,
      "VeMo (min entropy view)": 0.0002454519400987567,
      "VeMo (random view)": 0.00033512552636563654,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person picks up two objects and begins performing a motioning action with both objects."
  },
  "010409": {
    "text": "the person moves backwards as if pushed by someone in front of them.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4505968616403643,
      "Minus Multimodal Distance": -10.302921295166016,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9908158779144287,
      "MoBERT-F": 0.7496230776985856,
      "MoBERT-N": 0.6824307215627758,
      "MoBERT-min(F/N)": 0.6824307215627758,
      "MoBERT-max(F/N)": 0.7496230776985856,
      "MotionCritic": -2.3273377418518066,
      "VeMo (human-opt view)": 0.6788990825688074,
      "VeMo (max entropy view)": 0.6788990825688074,
      "VeMo (min entropy view)": 0.7987987987987988,
      "VeMo (random view)": 0.7987987987987988,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person moves backward as if pushed by someone in front of them."
  },
  "003954": {
    "text": "a person standing in one position, gently swinging both arms in back-and-forth motion",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.27811040723300817,
      "Minus Multimodal Distance": -7.69091272354126,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0003444643225520849,
      "MoBERT-F": 0.5701769418928053,
      "MoBERT-N": 0.555193052027079,
      "MoBERT-min(F/N)": 0.555193052027079,
      "MoBERT-max(F/N)": 0.5701769418928053,
      "MotionCritic": -9.778360366821289,
      "VeMo (human-opt view)": 0.8668730650154799,
      "VeMo (max entropy view)": 0.8668730650154799,
      "VeMo (min entropy view)": 0.9580386610089581,
      "VeMo (random view)": 0.8668730650154799,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands in one position, gently swinging both arms in a back-and-forth motion."
  },
  "009880": {
    "text": "a man beginning with his right leg slides to the right side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.31667248609679977,
      "Minus Multimodal Distance": -5.949052810668945,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9677103157155216e-05,
      "MoBERT-F": 0.30376744934282773,
      "MoBERT-N": 0.4420005609513007,
      "MoBERT-min(F/N)": 0.30376744934282773,
      "MoBERT-max(F/N)": 0.4420005609513007,
      "MotionCritic": -6.826096057891846,
      "VeMo (human-opt view)": 0.9048991354466859,
      "VeMo (max entropy view)": 0.8740894901144641,
      "VeMo (min entropy view)": 0.9048991354466859,
      "VeMo (random view)": 0.8740894901144641,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man begins with his right leg and slides to the right side."
  },
  "006967": {
    "text": "a person standing in a square, walks forward in a diagonal pattern.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4875200198816818,
      "Minus Multimodal Distance": -2.9948136806488037,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9460901618003845,
      "MoBERT-F": 0.7292890807512559,
      "MoBERT-N": 0.6714469542145552,
      "MoBERT-min(F/N)": 0.6714469542145552,
      "MoBERT-max(F/N)": 0.7292890807512559,
      "MotionCritic": -5.562899112701416,
      "VeMo (human-opt view)": 0.9101251422070534,
      "VeMo (max entropy view)": 0.9045362220717671,
      "VeMo (min entropy view)": 0.9101251422070534,
      "VeMo (random view)": 0.9045362220717671,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person standing in a square walks forward in a diagonal pattern."
  },
  "003763": {
    "text": "a person side steps to the right and then stands up straight.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3722712808526789,
      "Minus Multimodal Distance": -6.192883014678955,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0003653625026345253,
      "MoBERT-F": 0.5314581893839835,
      "MoBERT-N": 0.5975261111307139,
      "MoBERT-min(F/N)": 0.5314581893839835,
      "MoBERT-max(F/N)": 0.5975261111307139,
      "MotionCritic": -4.203762531280518,
      "VeMo (human-opt view)": 0.9819445621768246,
      "VeMo (max entropy view)": 0.9649309245483528,
      "VeMo (min entropy view)": 0.9819445621768246,
      "VeMo (random view)": 0.9649309245483528,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person side - steps to the right and then stands up straight."
  },
  "005300": {
    "text": "a person dribbles a basketball through their legs then runs quickly.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0673693178783301,
      "Minus Multimodal Distance": -9.676719665527344,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.12997344136238098,
      "MoBERT-F": 0.5961877061014569,
      "MoBERT-N": 0.5875900988090111,
      "MoBERT-min(F/N)": 0.5875900988090111,
      "MoBERT-max(F/N)": 0.5961877061014569,
      "MotionCritic": -9.927677154541016,
      "VeMo (human-opt view)": 8.194498302984546e-05,
      "VeMo (max entropy view)": 8.194498302984546e-05,
      "VeMo (min entropy view)": 3.2186658326475954e-05,
      "VeMo (random view)": 8.194498302984546e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person dribbles a basketball through their legs and then runs quickly."
  },
  "010547": {
    "text": "the person is walking to the left like a robot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9975594722904947,
      "Minus Multimodal Distance": -11.960724830627441,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00016492856957484037,
      "MoBERT-F": 0.45363067732196194,
      "MoBERT-N": 0.5010439707210189,
      "MoBERT-min(F/N)": 0.45363067732196194,
      "MoBERT-max(F/N)": 0.5010439707210189,
      "MotionCritic": -6.14514684677124,
      "VeMo (human-opt view)": 0.7545304777594728,
      "VeMo (max entropy view)": 0.5633802816901409,
      "VeMo (min entropy view)": 0.7545304777594728,
      "VeMo (random view)": 0.7545304777594728,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking to the left like a robot."
  },
  "008905": {
    "text": "a person raises their left arm up to their face.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2618453897343407,
      "Minus Multimodal Distance": -9.631319999694824,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.743773777387105e-05,
      "MoBERT-F": 0.29869940042597176,
      "MoBERT-N": 0.41139561462201624,
      "MoBERT-min(F/N)": 0.29869940042597176,
      "MoBERT-max(F/N)": 0.41139561462201624,
      "MotionCritic": -14.03976821899414,
      "VeMo (human-opt view)": 0.9649063032367973,
      "VeMo (max entropy view)": 0.9649063032367973,
      "VeMo (min entropy view)": 0.9850268124521206,
      "VeMo (random view)": 0.9649063032367973,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises their left arm up to their face. "
  },
  "010037": {
    "text": "a right handed golfer takes a golf swing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7903552091794258,
      "Minus Multimodal Distance": -7.567914962768555,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.0713439628016204e-05,
      "MoBERT-F": 0.5228436489316376,
      "MoBERT-N": 0.5743800548778768,
      "MoBERT-min(F/N)": 0.5228436489316376,
      "MoBERT-max(F/N)": 0.5743800548778768,
      "MotionCritic": -9.599989891052246,
      "VeMo (human-opt view)": 0.22285714285714286,
      "VeMo (max entropy view)": 0.22285714285714286,
      "VeMo (min entropy view)": 0.03520978283786633,
      "VeMo (random view)": 0.03520978283786633,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A right - handed golfer takes a golf swing."
  },
  "012595": {
    "text": "the person is walking straight backwards.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.1833458182272283,
      "Minus Multimodal Distance": -12.49690055847168,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.961535632610321,
      "MoBERT-F": 0.7727293504671349,
      "MoBERT-N": 0.5938076839752295,
      "MoBERT-min(F/N)": 0.5938076839752295,
      "MoBERT-max(F/N)": 0.7727293504671349,
      "MotionCritic": -4.131337642669678,
      "VeMo (human-opt view)": 0.7058823529411765,
      "VeMo (max entropy view)": 0.7058823529411765,
      "VeMo (min entropy view)": 0.9323843416370107,
      "VeMo (random view)": 0.9323843416370107,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking straight backwards."
  },
  "000597": {
    "text": "the person is widewalking.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5242796535478953,
      "Minus Multimodal Distance": -6.494458198547363,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.832629540236667e-05,
      "MoBERT-F": 0.48307491045395456,
      "MoBERT-N": 0.5807157354236891,
      "MoBERT-min(F/N)": 0.48307491045395456,
      "MoBERT-max(F/N)": 0.5807157354236891,
      "MotionCritic": -4.6754679679870605,
      "VeMo (human-opt view)": 0.8519968676585747,
      "VeMo (max entropy view)": 0.7772657450076805,
      "VeMo (min entropy view)": 0.8519968676585747,
      "VeMo (random view)": 0.7772657450076805,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking widely."
  },
  "003083": {
    "text": "the person starts by sitting and dials with their right hand then holds the phone with their left and starts by turning in their chair, before standing up walking to the left around the back of the chair then sitting down again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8475993252176491,
      "Minus Multimodal Distance": -10.39461612701416,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.009456058964133263,
      "MoBERT-F": 0.34912699411829967,
      "MoBERT-N": 0.5124643862394349,
      "MoBERT-min(F/N)": 0.34912699411829967,
      "MoBERT-max(F/N)": 0.5124643862394349,
      "MotionCritic": -5.614564895629883,
      "VeMo (human-opt view)": 6.0140514972092574e-05,
      "VeMo (max entropy view)": 0.0004880429477794046,
      "VeMo (min entropy view)": 6.0140514972092574e-05,
      "VeMo (random view)": 0.0004880429477794046,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person starts by sitting and dials with their right hand. Then, they hold the phone with their left hand and start turning in their chair. Before standing up, they walk to the left around the back of the chair and then sit down again."
  },
  "011694": {
    "text": "a person jumps while spreading legs apart and swinging arms out and above the head, then back down again.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5673495389278013,
      "Minus Multimodal Distance": -3.131129264831543,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9929110407829285,
      "MoBERT-F": 0.856041568770762,
      "MoBERT-N": 0.7895666448783919,
      "MoBERT-min(F/N)": 0.7895666448783919,
      "MoBERT-max(F/N)": 0.856041568770762,
      "MotionCritic": -5.903090953826904,
      "VeMo (human-opt view)": 0.9497242945183263,
      "VeMo (max entropy view)": 0.9497242945183263,
      "VeMo (min entropy view)": 0.96045197740113,
      "VeMo (random view)": 0.96045197740113,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps while spreading their legs apart and swinging their arms out and above their head, then back down again."
  },
  "007598": {
    "text": "a person walks in a counter clockwise circle and turns around to look after possibly tripping over something.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.834668263409325,
      "Minus Multimodal Distance": -2.065260648727417,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.26999717950820923,
      "MoBERT-F": 0.5518676552017786,
      "MoBERT-N": 0.576226990631696,
      "MoBERT-min(F/N)": 0.5518676552017786,
      "MoBERT-max(F/N)": 0.576226990631696,
      "MotionCritic": -13.781497955322266,
      "VeMo (human-opt view)": 0.5155440414507773,
      "VeMo (max entropy view)": 0.5155440414507773,
      "VeMo (min entropy view)": 0.5612244897959183,
      "VeMo (random view)": 0.5155440414507773,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks in a counter - clockwise circle and turns around to look after possibly tripping over something."
  },
  "011136": {
    "text": "a person who is jumping around with one leg but alternating the legs every jump",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1984467017494027,
      "Minus Multimodal Distance": -3.9881293773651123,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9221517443656921,
      "MoBERT-F": 0.8148614894303451,
      "MoBERT-N": 0.7706304085655173,
      "MoBERT-min(F/N)": 0.7706304085655173,
      "MoBERT-max(F/N)": 0.8148614894303451,
      "MotionCritic": -8.30394172668457,
      "VeMo (human-opt view)": 0.00011616745887163389,
      "VeMo (max entropy view)": 0.00011616745887163389,
      "VeMo (min entropy view)": 7.742666556143897e-05,
      "VeMo (random view)": 7.742666556143897e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who is jumping around on one leg alternates legs every jump."
  },
  "003780": {
    "text": "a person clasps its arms together then goes back to standing position with arms hanging.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.29093823031490834,
      "Minus Multimodal Distance": -8.10576343536377,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.624070814112201e-05,
      "MoBERT-F": 0.33811446552482227,
      "MoBERT-N": 0.44576226449095013,
      "MoBERT-min(F/N)": 0.33811446552482227,
      "MoBERT-max(F/N)": 0.44576226449095013,
      "MotionCritic": -0.9768519997596741,
      "VeMo (human-opt view)": 0.9953896572793424,
      "VeMo (max entropy view)": 0.8740894901144641,
      "VeMo (min entropy view)": 0.9953896572793424,
      "VeMo (random view)": 0.9953896572793424,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person clasps their arms together, then goes back to the standing position with their arms hanging."
  },
  "000998": {
    "text": "while walking forward he appears to be pushed, he rights himself and continues walking.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5193925490360455,
      "Minus Multimodal Distance": -5.178846836090088,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0016054307343438268,
      "MoBERT-F": 0.49790573205565947,
      "MoBERT-N": 0.5709622526787284,
      "MoBERT-min(F/N)": 0.49790573205565947,
      "MoBERT-max(F/N)": 0.5709622526787284,
      "MotionCritic": -9.30407428741455,
      "VeMo (human-opt view)": 0.6372980910425844,
      "VeMo (max entropy view)": 0.6372980910425844,
      "VeMo (min entropy view)": 0.6509090909090909,
      "VeMo (random view)": 0.6509090909090909,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "While walking forward, he appears to be pushed. He rights himself and continues walking."
  },
  "006648": {
    "text": "the person fills the glass with water then pours it into the plant pot",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8609892905640054,
      "Minus Multimodal Distance": -2.61214017868042,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 6.542981282109395e-05,
      "MoBERT-F": 0.274705888580898,
      "MoBERT-N": 0.35221137671431624,
      "MoBERT-min(F/N)": 0.274705888580898,
      "MoBERT-max(F/N)": 0.35221137671431624,
      "MotionCritic": -10.665172576904297,
      "VeMo (human-opt view)": 1.625910103180255e-05,
      "VeMo (max entropy view)": 2.3519363725405016e-05,
      "VeMo (min entropy view)": 1.625910103180255e-05,
      "VeMo (random view)": 1.625910103180255e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person fills the glass with water, then pours it into the plant pot."
  },
  "005583": {
    "text": "a figure elegantly stretches, slowly, with their arms extended and right leg extended",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6342484797383718,
      "Minus Multimodal Distance": -10.641870498657227,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2599648218601942e-05,
      "MoBERT-F": 0.3829974973100849,
      "MoBERT-N": 0.5219436535424741,
      "MoBERT-min(F/N)": 0.3829974973100849,
      "MoBERT-max(F/N)": 0.5219436535424741,
      "MotionCritic": -7.639815330505371,
      "VeMo (human-opt view)": 0.6071428571428571,
      "VeMo (max entropy view)": 0.5153374233128835,
      "VeMo (min entropy view)": 0.6071428571428571,
      "VeMo (random view)": 0.6071428571428571,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person elegantly stretches, slowly, with their arms extended and right leg extended."
  },
  "009184": {
    "text": "a person raises both hands and places them in front of themselves",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2955254418448065,
      "Minus Multimodal Distance": -12.026008605957031,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.822047438006848e-05,
      "MoBERT-F": 0.32060010562022057,
      "MoBERT-N": 0.3750668038505342,
      "MoBERT-min(F/N)": 0.32060010562022057,
      "MoBERT-max(F/N)": 0.3750668038505342,
      "MotionCritic": -7.922575950622559,
      "VeMo (human-opt view)": 0.9961643226416144,
      "VeMo (max entropy view)": 0.9244060475161987,
      "VeMo (min entropy view)": 0.9961643226416144,
      "VeMo (random view)": 0.9961643226416144,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises both hands and places them in front of themselves."
  },
  "012837": {
    "text": "a person raised his hand, and leaned it near to face",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4185584834591811,
      "Minus Multimodal Distance": -6.474261283874512,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.0611226722830907e-05,
      "MoBERT-F": 0.2936743582995406,
      "MoBERT-N": 0.40670478895060097,
      "MoBERT-min(F/N)": 0.2936743582995406,
      "MoBERT-max(F/N)": 0.40670478895060097,
      "MotionCritic": -3.7313735485076904,
      "VeMo (human-opt view)": 0.9959485927396635,
      "VeMo (max entropy view)": 0.9883282456455378,
      "VeMo (min entropy view)": 0.9959485927396635,
      "VeMo (random view)": 0.9883282456455378,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raised his hand and leaned it close to his face."
  },
  "004473": {
    "text": "a person walks forward, spins on their foot, and walks back",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6687515715481362,
      "Minus Multimodal Distance": -7.260213851928711,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.726620078086853,
      "MoBERT-F": 0.63213729126978,
      "MoBERT-N": 0.6707252119667212,
      "MoBERT-min(F/N)": 0.63213729126978,
      "MoBERT-max(F/N)": 0.6707252119667212,
      "MotionCritic": -14.013561248779297,
      "VeMo (human-opt view)": 0.578544061302682,
      "VeMo (max entropy view)": 0.4840182648401826,
      "VeMo (min entropy view)": 0.578544061302682,
      "VeMo (random view)": 0.578544061302682,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, spins on their foot, and walks back."
  },
  "010658": {
    "text": "a person walking like a bird and then sniffing the air.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1139728896469623,
      "Minus Multimodal Distance": -9.530396461486816,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.992517352104187,
      "MoBERT-F": 0.7046992703496782,
      "MoBERT-N": 0.6766767215345151,
      "MoBERT-min(F/N)": 0.6766767215345151,
      "MoBERT-max(F/N)": 0.7046992703496782,
      "MotionCritic": -9.542818069458008,
      "VeMo (human-opt view)": 0.05642519111758282,
      "VeMo (max entropy view)": 0.14078374455732948,
      "VeMo (min entropy view)": 0.05642519111758282,
      "VeMo (random view)": 0.14078374455732948,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking like a bird and then sniffing the air."
  },
  "005672": {
    "text": "the person pulls the book off of the shelf",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.19386431002221094,
      "Minus Multimodal Distance": -8.887859344482422,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7003168725059368e-05,
      "MoBERT-F": 0.3552200773952898,
      "MoBERT-N": 0.4215291898197464,
      "MoBERT-min(F/N)": 0.3552200773952898,
      "MoBERT-max(F/N)": 0.4215291898197464,
      "MotionCritic": -10.163872718811035,
      "VeMo (human-opt view)": 1.7232265815974735e-05,
      "VeMo (max entropy view)": 5.147278339811551e-05,
      "VeMo (min entropy view)": 1.7232265815974735e-05,
      "VeMo (random view)": 1.7232265815974735e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person pulls the book off the shelf."
  },
  "004063": {
    "text": "a person stands and trying to hold balance.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.48847621619234854,
      "Minus Multimodal Distance": -9.678043365478516,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.788120946206618e-05,
      "MoBERT-F": 0.34454733153050443,
      "MoBERT-N": 0.4836235646117104,
      "MoBERT-min(F/N)": 0.34454733153050443,
      "MoBERT-max(F/N)": 0.4836235646117104,
      "MotionCritic": -7.733691692352295,
      "VeMo (human-opt view)": 0.9578837452140619,
      "VeMo (max entropy view)": 0.8670309653916212,
      "VeMo (min entropy view)": 0.9578837452140619,
      "VeMo (random view)": 0.8670309653916212,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands and tries to hold their balance."
  },
  "014444": {
    "text": "figure appears to be walking back and forth",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7734514408403157,
      "Minus Multimodal Distance": -11.0426607131958,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.254675030708313,
      "MoBERT-F": 0.5560104019779983,
      "MoBERT-N": 0.5444003996793866,
      "MoBERT-min(F/N)": 0.5444003996793866,
      "MoBERT-max(F/N)": 0.5560104019779983,
      "MotionCritic": -25.88422393798828,
      "VeMo (human-opt view)": 0.9326705829191143,
      "VeMo (max entropy view)": 0.9326705829191143,
      "VeMo (min entropy view)": 0.9725388038641051,
      "VeMo (random view)": 0.9326705829191143,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The figure appears to be walking back and forth."
  },
  "002606": {
    "text": "a man is shadowboxing while standing still.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5801974027447508,
      "Minus Multimodal Distance": -8.674084663391113,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2484144210466184e-05,
      "MoBERT-F": 0.33283367285023224,
      "MoBERT-N": 0.4703908752957846,
      "MoBERT-min(F/N)": 0.33283367285023224,
      "MoBERT-max(F/N)": 0.4703908752957846,
      "MotionCritic": -5.411013603210449,
      "VeMo (human-opt view)": 0.11279620853080569,
      "VeMo (max entropy view)": 0.24508050089445438,
      "VeMo (min entropy view)": 0.11279620853080569,
      "VeMo (random view)": 0.24508050089445438,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is shadowboxing while standing still."
  },
  "002364": {
    "text": "a person standing forward doing leg kicks.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4968182542121066,
      "Minus Multimodal Distance": -9.604544639587402,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9381358623504639,
      "MoBERT-F": 0.8007908505631303,
      "MoBERT-N": 0.7993850174909608,
      "MoBERT-min(F/N)": 0.7993850174909608,
      "MoBERT-max(F/N)": 0.8007908505631303,
      "MotionCritic": -4.947554111480713,
      "VeMo (human-opt view)": 0.9889798682509389,
      "VeMo (max entropy view)": 0.9101796407185628,
      "VeMo (min entropy view)": 0.9889798682509389,
      "VeMo (random view)": 0.9101796407185628,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing facing forward and doing leg kicks."
  },
  "009666": {
    "text": "the person is sweeping the floor.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.134256144457705,
      "Minus Multimodal Distance": -10.264254570007324,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0001755151606630534,
      "MoBERT-F": 0.5976996601386694,
      "MoBERT-N": 0.4929353501878172,
      "MoBERT-min(F/N)": 0.4929353501878172,
      "MoBERT-max(F/N)": 0.5976996601386694,
      "MotionCritic": -4.946068286895752,
      "VeMo (human-opt view)": 0.2225201072386059,
      "VeMo (max entropy view)": 0.2225201072386059,
      "VeMo (min entropy view)": 0.08993956629932456,
      "VeMo (random view)": 0.2225201072386059,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is sweeping the floor."
  },
  "011438": {
    "text": "the man is dancing around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9382044661839138,
      "Minus Multimodal Distance": -3.7563397884368896,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9657905101776123,
      "MoBERT-F": 0.7322175471586507,
      "MoBERT-N": 0.666702214281077,
      "MoBERT-min(F/N)": 0.666702214281077,
      "MoBERT-max(F/N)": 0.7322175471586507,
      "MotionCritic": -9.978110313415527,
      "VeMo (human-opt view)": 0.9498164014687882,
      "VeMo (max entropy view)": 0.8987654320987655,
      "VeMo (min entropy view)": 0.9498164014687882,
      "VeMo (random view)": 0.8987654320987655,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man is dancing around."
  },
  "007772": {
    "text": "a person is using binoculars",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.20588451636245905,
      "Minus Multimodal Distance": -10.862933158874512,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.134363553021103e-05,
      "MoBERT-F": 0.3430206434019447,
      "MoBERT-N": 0.4040280993083967,
      "MoBERT-min(F/N)": 0.3430206434019447,
      "MoBERT-max(F/N)": 0.4040280993083967,
      "MotionCritic": -7.9044904708862305,
      "VeMo (human-opt view)": 0.3213367609254499,
      "VeMo (max entropy view)": 0.3213367609254499,
      "VeMo (min entropy view)": 0.2690124858115778,
      "VeMo (random view)": 0.3213367609254499,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is using binoculars."
  },
  "013633": {
    "text": "a person walks forward after appearing to lift something with their right hand and swinging their right arm slightly whilst walking.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.37230964269650485,
      "Minus Multimodal Distance": -7.094934463500977,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.558523455169052e-05,
      "MoBERT-F": 0.2550990774971934,
      "MoBERT-N": 0.37341226422914847,
      "MoBERT-min(F/N)": 0.2550990774971934,
      "MoBERT-max(F/N)": 0.37341226422914847,
      "MotionCritic": -10.020859718322754,
      "VeMo (human-opt view)": 0.4532967032967033,
      "VeMo (max entropy view)": 0.4532967032967033,
      "VeMo (min entropy view)": 0.08999543170397442,
      "VeMo (random view)": 0.4532967032967033,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward after appearing to lift something with their right hand and swinging their right arm slightly while walking."
  },
  "011863": {
    "text": "a man stands with his legs parted and slowly rotates his upper torso a few times, then starts to rotate his hips.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.37981573167737215,
      "Minus Multimodal Distance": -2.8906989097595215,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.261895108153112e-05,
      "MoBERT-F": 0.4210054361992708,
      "MoBERT-N": 0.42023454619489614,
      "MoBERT-min(F/N)": 0.42023454619489614,
      "MoBERT-max(F/N)": 0.4210054361992708,
      "MotionCritic": -5.974347114562988,
      "VeMo (human-opt view)": 0.10679611650485436,
      "VeMo (max entropy view)": 0.25699745547073793,
      "VeMo (min entropy view)": 0.10679611650485436,
      "VeMo (random view)": 0.25699745547073793,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man stands with his legs parted and slowly rotates his upper torso a few times. Then, he starts to rotate his hips."
  },
  "001384": {
    "text": "a person lifts his left hand and waves his hand to say hello then puts left hand back down",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.20689850342511226,
      "Minus Multimodal Distance": -11.139717102050781,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5890087272273377e-05,
      "MoBERT-F": 0.2520656148489129,
      "MoBERT-N": 0.3731969725315096,
      "MoBERT-min(F/N)": 0.2520656148489129,
      "MoBERT-max(F/N)": 0.3731969725315096,
      "MotionCritic": -10.54690933227539,
      "VeMo (human-opt view)": 0.9649309245483528,
      "VeMo (max entropy view)": 0.9649309245483528,
      "VeMo (min entropy view)": 0.9705775517617145,
      "VeMo (random view)": 0.9649309245483528,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person lifts his left hand, waves it to say hello, then puts his left hand back down."
  },
  "012132": {
    "text": "a man does a push up and then uses his arms to balance himself back to his feet.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9732101218722943,
      "Minus Multimodal Distance": -10.837509155273438,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.008293001912534237,
      "MoBERT-F": 0.5108697758548126,
      "MoBERT-N": 0.5008141566377922,
      "MoBERT-min(F/N)": 0.5008141566377922,
      "MoBERT-max(F/N)": 0.5108697758548126,
      "MotionCritic": -3.1458258628845215,
      "VeMo (human-opt view)": 0.9796194503171247,
      "VeMo (max entropy view)": 0.9048991354466859,
      "VeMo (min entropy view)": 0.9796194503171247,
      "VeMo (random view)": 0.9048991354466859,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man does a push-up and then uses his arms to balance himself back to his feet."
  },
  "010671": {
    "text": "a person lunges forward with one foot, reaching his arms out at the same time; this happens twice.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8944187974358069,
      "Minus Multimodal Distance": -4.180399417877197,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.892654422088526e-05,
      "MoBERT-F": 0.3595740278208802,
      "MoBERT-N": 0.4912806631716199,
      "MoBERT-min(F/N)": 0.3595740278208802,
      "MoBERT-max(F/N)": 0.4912806631716199,
      "MotionCritic": -5.259863376617432,
      "VeMo (human-opt view)": 0.9434616718695803,
      "VeMo (max entropy view)": 0.8992460589444825,
      "VeMo (min entropy view)": 0.9434616718695803,
      "VeMo (random view)": 0.8992460589444825,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lunges forward with one foot while reaching their arms out simultaneously; this occurs twice."
  },
  "000580": {
    "text": "looks like he walks through a door to turn to the right and go straight forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8293787963020108,
      "Minus Multimodal Distance": -4.9605231285095215,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9522780096158385e-05,
      "MoBERT-F": 0.2940410741300173,
      "MoBERT-N": 0.47048353258155856,
      "MoBERT-min(F/N)": 0.2940410741300173,
      "MoBERT-max(F/N)": 0.47048353258155856,
      "MotionCritic": -5.816084861755371,
      "VeMo (human-opt view)": 0.651595744680851,
      "VeMo (max entropy view)": 0.651595744680851,
      "VeMo (min entropy view)": 0.7058823529411765,
      "VeMo (random view)": 0.651595744680851,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "It looks like he walks through a door, turns to the right, and goes straight forward."
  },
  "001548": {
    "text": "a person holds an object steady with their right arm and strums with their left arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.19536432568674028,
      "Minus Multimodal Distance": -3.356616973876953,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.388457844266668e-05,
      "MoBERT-F": 0.29145372670997577,
      "MoBERT-N": 0.39229148859708424,
      "MoBERT-min(F/N)": 0.29145372670997577,
      "MoBERT-max(F/N)": 0.39229148859708424,
      "MotionCritic": -8.229999542236328,
      "VeMo (human-opt view)": 0.00016836984900386073,
      "VeMo (max entropy view)": 0.024463680843056078,
      "VeMo (min entropy view)": 0.00016836984900386073,
      "VeMo (random view)": 0.024463680843056078,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person holds an object steady with their right arm and strums with their left arm."
  },
  "010384": {
    "text": "a person walks forward and picks things up and puts them down with their hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7157690823975452,
      "Minus Multimodal Distance": -11.315942764282227,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.0970473744673654e-05,
      "MoBERT-F": 0.3483776802741091,
      "MoBERT-N": 0.4890449234847882,
      "MoBERT-min(F/N)": 0.3483776802741091,
      "MoBERT-max(F/N)": 0.4890449234847882,
      "MotionCritic": -6.143372058868408,
      "VeMo (human-opt view)": 0.6085626911314985,
      "VeMo (max entropy view)": 0.45410628019323673,
      "VeMo (min entropy view)": 0.6085626911314985,
      "VeMo (random view)": 0.45410628019323673,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, picks things up, and puts them down with their hands."
  },
  "012414": {
    "text": "a person runs diagonally across a room with their arms swinging hands down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5673077978286478,
      "Minus Multimodal Distance": -6.775500774383545,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0007700179703533649,
      "MoBERT-F": 0.5874550183703218,
      "MoBERT-N": 0.5245282970766549,
      "MoBERT-min(F/N)": 0.5245282970766549,
      "MoBERT-max(F/N)": 0.5874550183703218,
      "MotionCritic": -17.655067443847656,
      "VeMo (human-opt view)": 0.8171673819742489,
      "VeMo (max entropy view)": 0.8171673819742489,
      "VeMo (min entropy view)": 0.8175182481751825,
      "VeMo (random view)": 0.8171673819742489,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs diagonally across a room with their arms swinging and their hands down."
  },
  "006567": {
    "text": "a person sits on a chair behind them and then stands back up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5390033389778017,
      "Minus Multimodal Distance": -9.826543807983398,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.6715102195739746,
      "MoBERT-F": 0.5517648291108477,
      "MoBERT-N": 0.41307394687737725,
      "MoBERT-min(F/N)": 0.41307394687737725,
      "MoBERT-max(F/N)": 0.5517648291108477,
      "MotionCritic": -8.690190315246582,
      "VeMo (human-opt view)": 0.5777777777777777,
      "VeMo (max entropy view)": 0.42355889724310775,
      "VeMo (min entropy view)": 0.5777777777777777,
      "VeMo (random view)": 0.5777777777777777,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits on a chair behind them and then stands back up."
  },
  "013449": {
    "text": "a person is boxing, jabbing mostly with his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9873057514857443,
      "Minus Multimodal Distance": -10.321089744567871,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4805136490613222e-05,
      "MoBERT-F": 0.3193361997063431,
      "MoBERT-N": 0.4366168164862678,
      "MoBERT-min(F/N)": 0.3193361997063431,
      "MoBERT-max(F/N)": 0.4366168164862678,
      "MotionCritic": -6.93419885635376,
      "VeMo (human-opt view)": 0.7063197026022305,
      "VeMo (max entropy view)": 0.7063197026022305,
      "VeMo (min entropy view)": 0.8674275680421423,
      "VeMo (random view)": 0.7063197026022305,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is boxing, jabbing mostly with their right hand."
  },
  "002754": {
    "text": "a man does a dance, shuffles backward, and dances again.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0828647162628156,
      "Minus Multimodal Distance": -5.519732475280762,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9581912755966187,
      "MoBERT-F": 0.6681386638595995,
      "MoBERT-N": 0.6772723617054235,
      "MoBERT-min(F/N)": 0.6681386638595995,
      "MoBERT-max(F/N)": 0.6772723617054235,
      "MotionCritic": -7.097957134246826,
      "VeMo (human-opt view)": 0.9325626204238922,
      "VeMo (max entropy view)": 0.9147788565264293,
      "VeMo (min entropy view)": 0.9325626204238922,
      "VeMo (random view)": 0.9147788565264293,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man does a dance, shuffles backward, and dances again."
  },
  "013419": {
    "text": "this person jumps up and down on his left leg.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5091888390072258,
      "Minus Multimodal Distance": -9.616719245910645,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.007434812840074301,
      "MoBERT-F": 0.6099387283499579,
      "MoBERT-N": 0.7491568817413916,
      "MoBERT-min(F/N)": 0.6099387283499579,
      "MoBERT-max(F/N)": 0.7491568817413916,
      "MotionCritic": -8.534149169921875,
      "VeMo (human-opt view)": 0.06379234491860977,
      "VeMo (max entropy view)": 0.06379234491860977,
      "VeMo (min entropy view)": 0.0018119681644074405,
      "VeMo (random view)": 0.06379234491860977,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person jumps up and down on his left leg."
  },
  "013175": {
    "text": "the person left of the dumbbell over his head.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5600328587717331,
      "Minus Multimodal Distance": -6.282885551452637,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8988353733439e-05,
      "MoBERT-F": 0.3559133326091134,
      "MoBERT-N": 0.44916904335525304,
      "MoBERT-min(F/N)": 0.3559133326091134,
      "MoBERT-max(F/N)": 0.44916904335525304,
      "MotionCritic": -2.221757411956787,
      "VeMo (human-opt view)": 0.36398467432950193,
      "VeMo (max entropy view)": 0.39263803680981596,
      "VeMo (min entropy view)": 0.36398467432950193,
      "VeMo (random view)": 0.36398467432950193,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person lifted the dumbbell over his head."
  },
  "007661": {
    "text": "a person moves their arms in a strange manner.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6188253122922461,
      "Minus Multimodal Distance": -8.377487182617188,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.350276165292598e-05,
      "MoBERT-F": 0.3251907585858867,
      "MoBERT-N": 0.38308595997825634,
      "MoBERT-min(F/N)": 0.3251907585858867,
      "MoBERT-max(F/N)": 0.38308595997825634,
      "MotionCritic": -12.212305068969727,
      "VeMo (human-opt view)": 0.9625537139349294,
      "VeMo (max entropy view)": 0.9625537139349294,
      "VeMo (min entropy view)": 0.9626658438753471,
      "VeMo (random view)": 0.9626658438753471,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves his or her arms in a strange manner."
  },
  "008157": {
    "text": "a person turns to their left while leaping forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9835603140781275,
      "Minus Multimodal Distance": -6.144869804382324,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.380949783604592e-05,
      "MoBERT-F": 0.45691809083007784,
      "MoBERT-N": 0.5539068035491255,
      "MoBERT-min(F/N)": 0.45691809083007784,
      "MoBERT-max(F/N)": 0.5539068035491255,
      "MotionCritic": -4.7421441078186035,
      "VeMo (human-opt view)": 0.8806431663574521,
      "VeMo (max entropy view)": 0.7304015296367112,
      "VeMo (min entropy view)": 0.8806431663574521,
      "VeMo (random view)": 0.7304015296367112,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person turns to their left while leaping forward."
  },
  "010305": {
    "text": "both arms raise to the right, hit throw",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8901683077493243,
      "Minus Multimodal Distance": -3.0091304779052734,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.959406891837716e-05,
      "MoBERT-F": 0.3997015967861168,
      "MoBERT-N": 0.511115257874906,
      "MoBERT-min(F/N)": 0.3997015967861168,
      "MoBERT-max(F/N)": 0.511115257874906,
      "MotionCritic": -2.561936140060425,
      "VeMo (human-opt view)": 0.6076388888888888,
      "VeMo (max entropy view)": 0.6076388888888888,
      "VeMo (min entropy view)": 0.7053571428571429,
      "VeMo (random view)": 0.6076388888888888,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person raises both arms to the right, then hits and throws something."
  },
  "012925": {
    "text": "a person slowly paces back and forth.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8728204738479685,
      "Minus Multimodal Distance": -3.465641975402832,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8738183379173279,
      "MoBERT-F": 0.6739058745051927,
      "MoBERT-N": 0.6539218720245176,
      "MoBERT-min(F/N)": 0.6539218720245176,
      "MoBERT-max(F/N)": 0.6739058745051927,
      "MotionCritic": -10.657882690429688,
      "VeMo (human-opt view)": 0.9924096817278562,
      "VeMo (max entropy view)": 0.9883306547411083,
      "VeMo (min entropy view)": 0.9924096817278562,
      "VeMo (random view)": 0.9883306547411083,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person slowly paces back and forth."
  },
  "003723": {
    "text": "person prepares food on a kitchen worktop.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.34711018003043026,
      "Minus Multimodal Distance": -7.230791091918945,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.5286455386085436e-05,
      "MoBERT-F": 0.4021755541090195,
      "MoBERT-N": 0.4528297206687744,
      "MoBERT-min(F/N)": 0.4021755541090195,
      "MoBERT-max(F/N)": 0.4528297206687744,
      "MotionCritic": -14.21576976776123,
      "VeMo (human-opt view)": 5.580729669808006e-06,
      "VeMo (max entropy view)": 5.972154314589138e-06,
      "VeMo (min entropy view)": 5.580729669808006e-06,
      "VeMo (random view)": 5.580729669808006e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person prepares food on a kitchen worktop."
  },
  "009449": {
    "text": "the person brings their hands up in front of them as if opening something, and then ends in a t-pose.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.1974546743130925,
      "Minus Multimodal Distance": -9.03443431854248,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.266243584221229e-05,
      "MoBERT-F": 0.2916361666251838,
      "MoBERT-N": 0.3627228693752727,
      "MoBERT-min(F/N)": 0.2916361666251838,
      "MoBERT-max(F/N)": 0.3627228693752727,
      "MotionCritic": -13.117531776428223,
      "VeMo (human-opt view)": 0.9867448025673224,
      "VeMo (max entropy view)": 0.9740340414958379,
      "VeMo (min entropy view)": 0.9867448025673224,
      "VeMo (random view)": 0.9867448025673224,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person brings their hands up in front of them as if opening something, and then ends in a T - pose."
  },
  "001772": {
    "text": "a person walks up to shake with their right hand, turns slightly right to shake again, and turns right again to shake for a final time.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5216031909069495,
      "Minus Multimodal Distance": -7.817182540893555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9395516321528703e-05,
      "MoBERT-F": 0.3871964050139399,
      "MoBERT-N": 0.49695057013801636,
      "MoBERT-min(F/N)": 0.3871964050139399,
      "MoBERT-max(F/N)": 0.49695057013801636,
      "MotionCritic": -10.669498443603516,
      "VeMo (human-opt view)": 0.08501715592832634,
      "VeMo (max entropy view)": 0.2576312576312576,
      "VeMo (min entropy view)": 0.08501715592832634,
      "VeMo (random view)": 0.08501715592832634,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks up and shakes with their right hand, then turns slightly to the right to shake again, and turns right once more to shake for a final time."
  },
  "010563": {
    "text": "a person lifts their left arm up, twitches a bit, then brings their right arm up while putting their left arm down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.37126022359176164,
      "Minus Multimodal Distance": -6.217512130737305,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.776065048237797e-05,
      "MoBERT-F": 0.37686919178506517,
      "MoBERT-N": 0.40366057498063895,
      "MoBERT-min(F/N)": 0.37686919178506517,
      "MoBERT-max(F/N)": 0.40366057498063895,
      "MotionCritic": -6.00542688369751,
      "VeMo (human-opt view)": 0.6370023419203747,
      "VeMo (max entropy view)": 0.6370023419203747,
      "VeMo (min entropy view)": 0.6373937677053825,
      "VeMo (random view)": 0.6373937677053825,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lifts their left arm up, twitches a bit, then brings their right arm up while putting their left arm down."
  },
  "008320": {
    "text": "a person while standing still raises his left hand up and down and turns to his left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5902994846068379,
      "Minus Multimodal Distance": -6.721682548522949,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.2382591598434374e-05,
      "MoBERT-F": 0.3075528271951433,
      "MoBERT-N": 0.4297928197396264,
      "MoBERT-min(F/N)": 0.3075528271951433,
      "MoBERT-max(F/N)": 0.4297928197396264,
      "MotionCritic": -13.55900764465332,
      "VeMo (human-opt view)": 0.9241179313678105,
      "VeMo (max entropy view)": 0.9241179313678105,
      "VeMo (min entropy view)": 0.9602053915275995,
      "VeMo (random view)": 0.9241179313678105,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person, while standing still, raises his left hand up and down and turns to his left."
  },
  "009926": {
    "text": "a person walks forward before bending down to hold his left knee with both hands",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6383715710330911,
      "Minus Multimodal Distance": -3.6682677268981934,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.626928653626237e-05,
      "MoBERT-F": 0.3958820407276845,
      "MoBERT-N": 0.5236652544046294,
      "MoBERT-min(F/N)": 0.3958820407276845,
      "MoBERT-max(F/N)": 0.5236652544046294,
      "MotionCritic": -10.179414749145508,
      "VeMo (human-opt view)": 0.8174807197943444,
      "VeMo (max entropy view)": 0.8174807197943444,
      "VeMo (min entropy view)": 0.8270676691729323,
      "VeMo (random view)": 0.8270676691729323,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward before bending down to hold his left knee with both hands."
  },
  "000268": {
    "text": "someone slouched in a sitting position gets up and begins walking in a circle.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1142072881600225,
      "Minus Multimodal Distance": -4.409824371337891,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9632865190505981,
      "MoBERT-F": 0.6652616238536526,
      "MoBERT-N": 0.6286540009308902,
      "MoBERT-min(F/N)": 0.6286540009308902,
      "MoBERT-max(F/N)": 0.6652616238536526,
      "MotionCritic": -8.778297424316406,
      "VeMo (human-opt view)": 0.9045412418906394,
      "VeMo (max entropy view)": 0.8597640891218873,
      "VeMo (min entropy view)": 0.9045412418906394,
      "VeMo (random view)": 0.9045412418906394,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone who was slouched in a sitting position gets up and begins walking in a circle."
  },
  "004458": {
    "text": "person moves forward and extends the right leg and kicks something to the front",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6525639749969279,
      "Minus Multimodal Distance": -6.262730121612549,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9424242973327637,
      "MoBERT-F": 0.8055084644642211,
      "MoBERT-N": 0.7135456130719074,
      "MoBERT-min(F/N)": 0.7135456130719074,
      "MoBERT-max(F/N)": 0.8055084644642211,
      "MotionCritic": -4.162920951843262,
      "VeMo (human-opt view)": 0.880648899188876,
      "VeMo (max entropy view)": 0.8352835283528353,
      "VeMo (min entropy view)": 0.880648899188876,
      "VeMo (random view)": 0.880648899188876,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person moves forward, extends the right leg, and kicks something forward."
  },
  "001752": {
    "text": "a person stands still then they throw a football",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5839436955978301,
      "Minus Multimodal Distance": -8.987814903259277,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5325758457183838,
      "MoBERT-F": 0.6901091147105425,
      "MoBERT-N": 0.7372575993785637,
      "MoBERT-min(F/N)": 0.6901091147105425,
      "MoBERT-max(F/N)": 0.7372575993785637,
      "MotionCritic": -5.235741138458252,
      "VeMo (human-opt view)": 0.1827027027027027,
      "VeMo (max entropy view)": 0.3632,
      "VeMo (min entropy view)": 0.1827027027027027,
      "VeMo (random view)": 0.1827027027027027,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands still and then throws a football."
  },
  "009958": {
    "text": "a person walks backward in a counterclockwise circle",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7077547772815524,
      "Minus Multimodal Distance": -2.157031297683716,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9908576607704163,
      "MoBERT-F": 0.5658310232626798,
      "MoBERT-N": 0.6561155279524036,
      "MoBERT-min(F/N)": 0.5658310232626798,
      "MoBERT-max(F/N)": 0.6561155279524036,
      "MotionCritic": -9.024923324584961,
      "VeMo (human-opt view)": 0.8441432720232332,
      "VeMo (max entropy view)": 0.6521739130434783,
      "VeMo (min entropy view)": 0.8441432720232332,
      "VeMo (random view)": 0.6521739130434783,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks backward in a counter - clockwise circle."
  },
  "008057": {
    "text": "a person dancing and spinning side to side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1314628112342162,
      "Minus Multimodal Distance": -9.689876556396484,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9953195452690125,
      "MoBERT-F": 0.7458488786293498,
      "MoBERT-N": 0.6833633363063906,
      "MoBERT-min(F/N)": 0.6833633363063906,
      "MoBERT-max(F/N)": 0.7458488786293498,
      "MotionCritic": -1.5716838836669922,
      "VeMo (human-opt view)": 0.9323173464813985,
      "VeMo (max entropy view)": 0.9284099472494348,
      "VeMo (min entropy view)": 0.9323173464813985,
      "VeMo (random view)": 0.9284099472494348,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is dancing and spinning from side to side."
  },
  "004307": {
    "text": "a person vaults over an obstacle.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2320196583266168,
      "Minus Multimodal Distance": -11.174467086791992,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9861261282349005e-05,
      "MoBERT-F": 0.36501528081312223,
      "MoBERT-N": 0.47795968851375575,
      "MoBERT-min(F/N)": 0.36501528081312223,
      "MoBERT-max(F/N)": 0.47795968851375575,
      "MotionCritic": -6.125921249389648,
      "VeMo (human-opt view)": 9.227358240842813e-06,
      "VeMo (max entropy view)": 0.0003572588980253727,
      "VeMo (min entropy view)": 9.227358240842813e-06,
      "VeMo (random view)": 0.0003572588980253727,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person vaults over an obstacle."
  },
  "007199": {
    "text": "it is a person walking backwards.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.32840710790593525,
      "Minus Multimodal Distance": -11.351178169250488,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9929954409599304,
      "MoBERT-F": 0.7331761135682123,
      "MoBERT-N": 0.6484290328128807,
      "MoBERT-min(F/N)": 0.6484290328128807,
      "MoBERT-max(F/N)": 0.7331761135682123,
      "MotionCritic": -7.145756244659424,
      "VeMo (human-opt view)": 0.7182320441988951,
      "VeMo (max entropy view)": 0.7182320441988951,
      "VeMo (min entropy view)": 0.9497716894977168,
      "VeMo (random view)": 0.7182320441988951,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "It is a person walking backwards."
  },
  "009776": {
    "text": "a figure does a standing sprint",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.807614678782631,
      "Minus Multimodal Distance": -9.637683868408203,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0001527979038655758,
      "MoBERT-F": 0.5797292761860063,
      "MoBERT-N": 0.6326360018347085,
      "MoBERT-min(F/N)": 0.5797292761860063,
      "MoBERT-max(F/N)": 0.6326360018347085,
      "MotionCritic": -10.624534606933594,
      "VeMo (human-opt view)": 0.9688206055128784,
      "VeMo (max entropy view)": 0.8675445391169636,
      "VeMo (min entropy view)": 0.9688206055128784,
      "VeMo (random view)": 0.8675445391169636,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure does a standing sprint."
  },
  "014428": {
    "text": "person is kicking an athletic ball",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7407618268190808,
      "Minus Multimodal Distance": -9.002820014953613,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9879820942878723,
      "MoBERT-F": 0.7603323240628288,
      "MoBERT-N": 0.6672414562941748,
      "MoBERT-min(F/N)": 0.6672414562941748,
      "MoBERT-max(F/N)": 0.7603323240628288,
      "MotionCritic": -4.416528224945068,
      "VeMo (human-opt view)": 0.4221105527638191,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.4221105527638191,
      "VeMo (random view)": 0.5,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is kicking an athletic ball."
  },
  "012578": {
    "text": "person person is planting vegetables.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.38468976797577203,
      "Minus Multimodal Distance": -10.228437423706055,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4242121071438305e-05,
      "MoBERT-F": 0.40737434604642064,
      "MoBERT-N": 0.45994717138050567,
      "MoBERT-min(F/N)": 0.40737434604642064,
      "MoBERT-max(F/N)": 0.45994717138050567,
      "MotionCritic": -3.7404210567474365,
      "VeMo (human-opt view)": 5.42085620215216e-06,
      "VeMo (max entropy view)": 8.915168061204686e-06,
      "VeMo (min entropy view)": 5.42085620215216e-06,
      "VeMo (random view)": 5.42085620215216e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is planting vegetables."
  },
  "007550": {
    "text": "a man climbs up steps.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5741042290743188,
      "Minus Multimodal Distance": -9.992829322814941,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8074509501457214,
      "MoBERT-F": 0.8510407810254198,
      "MoBERT-N": 0.7321432236004808,
      "MoBERT-min(F/N)": 0.7321432236004808,
      "MoBERT-max(F/N)": 0.8510407810254198,
      "MotionCritic": -6.2358174324035645,
      "VeMo (human-opt view)": 0.0002957325437630719,
      "VeMo (max entropy view)": 0.0031815375848805736,
      "VeMo (min entropy view)": 0.0002957325437630719,
      "VeMo (random view)": 0.0002957325437630719,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man climbs up the steps."
  },
  "008312": {
    "text": "the person walks across the area, picks something up and then starts to move its arm as if to make a scrubbing motion.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.093050243517554,
      "Minus Multimodal Distance": -5.34589147567749,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.2799641960300505e-05,
      "MoBERT-F": 0.26693778010041636,
      "MoBERT-N": 0.39351241588374514,
      "MoBERT-min(F/N)": 0.26693778010041636,
      "MoBERT-max(F/N)": 0.39351241588374514,
      "MotionCritic": -4.252010822296143,
      "VeMo (human-opt view)": 0.8267716535433071,
      "VeMo (max entropy view)": 0.8267716535433071,
      "VeMo (min entropy view)": 0.8929384965831435,
      "VeMo (random view)": 0.8929384965831435,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person walks across the area, picks something up, and then starts to move their arm as if to make a scrubbing motion."
  },
  "007513": {
    "text": "a person swings their right arm over their head as if they were spiking a volleyball.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7851440135803145,
      "Minus Multimodal Distance": -5.937469959259033,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.501582952798344e-05,
      "MoBERT-F": 0.48514205244699815,
      "MoBERT-N": 0.4897141268553489,
      "MoBERT-min(F/N)": 0.48514205244699815,
      "MoBERT-max(F/N)": 0.4897141268553489,
      "MotionCritic": -4.924071311950684,
      "VeMo (human-opt view)": 0.8869690424766019,
      "VeMo (max entropy view)": 0.8869690424766019,
      "VeMo (min entropy view)": 0.9193267556587348,
      "VeMo (random view)": 0.9193267556587348,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person swings their right arm over their head as if they were spiking a volleyball."
  },
  "002315": {
    "text": "a stick fogure holds their arms straight up and then quickly puts them back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.36537519614266106,
      "Minus Multimodal Distance": -6.407447814941406,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9604201699839905e-05,
      "MoBERT-F": 0.4351714302356291,
      "MoBERT-N": 0.4827458611861013,
      "MoBERT-min(F/N)": 0.4351714302356291,
      "MoBERT-max(F/N)": 0.4827458611861013,
      "MotionCritic": -11.127992630004883,
      "VeMo (human-opt view)": 0.9433224755700326,
      "VeMo (max entropy view)": 0.9433224755700326,
      "VeMo (min entropy view)": 0.9739383998542008,
      "VeMo (random view)": 0.9433224755700326,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A stick figure holds its arms straight up and then quickly puts them back down."
  },
  "010586": {
    "text": "a person carefully sits down on the ground and crosses their legs.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7981039023731766,
      "Minus Multimodal Distance": -10.502724647521973,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5143097639083862,
      "MoBERT-F": 0.47563359225650187,
      "MoBERT-N": 0.46824059889844594,
      "MoBERT-min(F/N)": 0.46824059889844594,
      "MoBERT-max(F/N)": 0.47563359225650187,
      "MotionCritic": -6.30200719833374,
      "VeMo (human-opt view)": 0.8434864104967198,
      "VeMo (max entropy view)": 0.8175473579262214,
      "VeMo (min entropy view)": 0.8434864104967198,
      "VeMo (random view)": 0.8175473579262214,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person carefully sits down on the ground and crosses their legs."
  },
  "013728": {
    "text": "a person with its hand raised, gets down on to its knees.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5316445834115081,
      "Minus Multimodal Distance": -7.186729907989502,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.007975274696946144,
      "MoBERT-F": 0.5558899040315155,
      "MoBERT-N": 0.5637749993565635,
      "MoBERT-min(F/N)": 0.5558899040315155,
      "MoBERT-max(F/N)": 0.5637749993565635,
      "MotionCritic": -7.711930751800537,
      "VeMo (human-opt view)": 0.7428571428571429,
      "VeMo (max entropy view)": 0.7428571428571429,
      "VeMo (min entropy view)": 0.7545454545454545,
      "VeMo (random view)": 0.7428571428571429,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person with their hand raised gets down on their knees."
  },
  "006106": {
    "text": "a person stands still for a moment, and then staggers forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.18249084251660733,
      "Minus Multimodal Distance": -7.196072101593018,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 8.942493877839297e-05,
      "MoBERT-F": 0.30527036242574773,
      "MoBERT-N": 0.3763891724445324,
      "MoBERT-min(F/N)": 0.30527036242574773,
      "MoBERT-max(F/N)": 0.3763891724445324,
      "MotionCritic": -1.5430384874343872,
      "VeMo (human-opt view)": 0.9723435225618632,
      "VeMo (max entropy view)": 0.9241179313678105,
      "VeMo (min entropy view)": 0.9723435225618632,
      "VeMo (random view)": 0.9241179313678105,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands still for a moment and then staggers forward."
  },
  "010964": {
    "text": "a person lowers and walks on all fours to the right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0343180911401517,
      "Minus Multimodal Distance": -7.406316757202148,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9955911636352539,
      "MoBERT-F": 0.6561259591481519,
      "MoBERT-N": 0.6034377501311264,
      "MoBERT-min(F/N)": 0.6034377501311264,
      "MoBERT-max(F/N)": 0.6561259591481519,
      "MotionCritic": -5.460168838500977,
      "VeMo (human-opt view)": 0.9497716894977168,
      "VeMo (max entropy view)": 0.9497716894977168,
      "VeMo (min entropy view)": 0.9668916747026679,
      "VeMo (random view)": 0.9668916747026679,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person lowers their body down and walks on all fours to the right."
  },
  "001359": {
    "text": "a person puts their left hand up by their head",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.36445432002678874,
      "Minus Multimodal Distance": -11.697049140930176,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.8528996305540204e-05,
      "MoBERT-F": 0.2979718165336842,
      "MoBERT-N": 0.3708721396951137,
      "MoBERT-min(F/N)": 0.2979718165336842,
      "MoBERT-max(F/N)": 0.3708721396951137,
      "MotionCritic": -11.839815139770508,
      "VeMo (human-opt view)": 0.9859269126078647,
      "VeMo (max entropy view)": 0.9859269126078647,
      "VeMo (min entropy view)": 0.9876103568513984,
      "VeMo (random view)": 0.9859269126078647,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person puts their left hand up near their head."
  },
  "006058": {
    "text": "the sim walks down the plane in a wobbly fashion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5465635375435911,
      "Minus Multimodal Distance": -11.384705543518066,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7016771127819084e-05,
      "MoBERT-F": 0.43534719976382735,
      "MoBERT-N": 0.5066249508532941,
      "MoBERT-min(F/N)": 0.43534719976382735,
      "MoBERT-max(F/N)": 0.5066249508532941,
      "MotionCritic": -2.6631782054901123,
      "VeMo (human-opt view)": 0.8076923076923077,
      "VeMo (max entropy view)": 0.7978723404255319,
      "VeMo (min entropy view)": 0.8076923076923077,
      "VeMo (random view)": 0.7978723404255319,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person walks down the plane in a wobbly fashion."
  },
  "005472": {
    "text": "the person is walking and making a right turn.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.1374527141514162,
      "Minus Multimodal Distance": -11.516735076904297,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.301144559169188e-05,
      "MoBERT-F": 0.3930040332461725,
      "MoBERT-N": 0.5028466758539945,
      "MoBERT-min(F/N)": 0.3930040332461725,
      "MoBERT-max(F/N)": 0.5028466758539945,
      "MotionCritic": -2.0393259525299072,
      "VeMo (human-opt view)": 0.9796506392940753,
      "VeMo (max entropy view)": 0.9796506392940753,
      "VeMo (min entropy view)": 0.985064892715059,
      "VeMo (random view)": 0.9796506392940753,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking and making a right turn."
  },
  "002024": {
    "text": "a person doing air kicks with his right feet.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7360254745754632,
      "Minus Multimodal Distance": -5.582269668579102,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9916372895240784,
      "MoBERT-F": 0.8279725387883576,
      "MoBERT-N": 0.8412424932091658,
      "MoBERT-min(F/N)": 0.8279725387883576,
      "MoBERT-max(F/N)": 0.8412424932091658,
      "MotionCritic": -7.086212158203125,
      "VeMo (human-opt view)": 0.9580386610089581,
      "VeMo (max entropy view)": 0.899165061014772,
      "VeMo (min entropy view)": 0.9580386610089581,
      "VeMo (random view)": 0.899165061014772,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is doing air kicks with his right foot."
  },
  "011091": {
    "text": "the person walked forward and then turn right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2985382440264269,
      "Minus Multimodal Distance": -5.464646339416504,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4740240405662917e-05,
      "MoBERT-F": 0.38114218569419855,
      "MoBERT-N": 0.6292612803862804,
      "MoBERT-min(F/N)": 0.38114218569419855,
      "MoBERT-max(F/N)": 0.6292612803862804,
      "MotionCritic": -3.65193510055542,
      "VeMo (human-opt view)": 0.97833890332962,
      "VeMo (max entropy view)": 0.97833890332962,
      "VeMo (min entropy view)": 0.98414517669532,
      "VeMo (random view)": 0.98414517669532,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person walked forward and then turned right."
  },
  "001486": {
    "text": "the person is picking up something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6798621820476703,
      "Minus Multimodal Distance": -4.9312944412231445,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7143310944666155e-05,
      "MoBERT-F": 0.4222394180713546,
      "MoBERT-N": 0.4776051895379213,
      "MoBERT-min(F/N)": 0.4222394180713546,
      "MoBERT-max(F/N)": 0.4776051895379213,
      "MotionCritic": -5.877636432647705,
      "VeMo (human-opt view)": 0.9669064748201439,
      "VeMo (max entropy view)": 0.9603751465416178,
      "VeMo (min entropy view)": 0.9669064748201439,
      "VeMo (random view)": 0.9603751465416178,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is picking up something."
  },
  "002932": {
    "text": "the person does 2 cartwheels",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.9733711348250351,
      "Minus Multimodal Distance": -6.608089447021484,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9678586721420288,
      "MoBERT-F": 0.7169762642468389,
      "MoBERT-N": 0.7022566008296296,
      "MoBERT-min(F/N)": 0.7022566008296296,
      "MoBERT-max(F/N)": 0.7169762642468389,
      "MotionCritic": -10.245562553405762,
      "VeMo (human-opt view)": 0.9049773755656109,
      "VeMo (max entropy view)": 0.8806431663574521,
      "VeMo (min entropy view)": 0.9049773755656109,
      "VeMo (random view)": 0.9049773755656109,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person does 2 cartwheels."
  },
  "008343": {
    "text": "a standing man leans down to a kneeled position with his left knee contacting the ground and his right leg planted foot down. the man then stands up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3950211118668655,
      "Minus Multimodal Distance": -10.901961326599121,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9473499655723572,
      "MoBERT-F": 0.536966785673655,
      "MoBERT-N": 0.454029762087678,
      "MoBERT-min(F/N)": 0.454029762087678,
      "MoBERT-max(F/N)": 0.536966785673655,
      "MotionCritic": 0.6074677109718323,
      "VeMo (human-opt view)": 0.9497716894977168,
      "VeMo (max entropy view)": 0.9466498103666245,
      "VeMo (min entropy view)": 0.9497716894977168,
      "VeMo (random view)": 0.9466498103666245,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A standing man leans down into a kneeling position with his left knee touching the ground and his right leg planted with the foot down. The man then stands up."
  },
  "008208": {
    "text": "a person quickly runs straight forward, then bends down and picks up something with both hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9468962685977568,
      "Minus Multimodal Distance": -3.515775203704834,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9953864216804504,
      "MoBERT-F": 0.6642759781440513,
      "MoBERT-N": 0.6656551947521265,
      "MoBERT-min(F/N)": 0.6642759781440513,
      "MoBERT-max(F/N)": 0.6656551947521265,
      "MotionCritic": -7.556058406829834,
      "VeMo (human-opt view)": 0.6788511749347258,
      "VeMo (max entropy view)": 0.6788511749347258,
      "VeMo (min entropy view)": 0.7983539094650206,
      "VeMo (random view)": 0.7983539094650206,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person quickly runs straight forward, then bends down and picks up something with both hands."
  },
  "009244": {
    "text": "person standing with both feet firmly planted on the ground upper body moved very slightly",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.1850906551875961,
      "Minus Multimodal Distance": -6.6637678146362305,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.255089490674436e-05,
      "MoBERT-F": 0.31798950844696927,
      "MoBERT-N": 0.4266081587993908,
      "MoBERT-min(F/N)": 0.31798950844696927,
      "MoBERT-max(F/N)": 0.4266081587993908,
      "MotionCritic": -7.313726902008057,
      "VeMo (human-opt view)": 0.8933500627352572,
      "VeMo (max entropy view)": 0.8804554079696395,
      "VeMo (min entropy view)": 0.8933500627352572,
      "VeMo (random view)": 0.8933500627352572,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing with both feet firmly planted on the ground, and their upper body moves very slightly."
  },
  "001169": {
    "text": "a person doing a spesific moves with legs and hands while doing boxing",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3084539712310435,
      "Minus Multimodal Distance": -10.656379699707031,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.372052303049713e-05,
      "MoBERT-F": 0.4558697807460114,
      "MoBERT-N": 0.42756560165702917,
      "MoBERT-min(F/N)": 0.42756560165702917,
      "MoBERT-max(F/N)": 0.4558697807460114,
      "MotionCritic": -9.034042358398438,
      "VeMo (human-opt view)": 0.7049504950495049,
      "VeMo (max entropy view)": 0.6933333333333334,
      "VeMo (min entropy view)": 0.7049504950495049,
      "VeMo (random view)": 0.7049504950495049,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person doing specific movements with legs and hands while doing boxing."
  },
  "008614": {
    "text": "a man slowly leans forward and moves around and carries a heavy object.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6234362278713739,
      "Minus Multimodal Distance": -6.198062896728516,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.4988946318626404,
      "MoBERT-F": 0.6179172457120305,
      "MoBERT-N": 0.5101242780448872,
      "MoBERT-min(F/N)": 0.5101242780448872,
      "MoBERT-max(F/N)": 0.6179172457120305,
      "MotionCritic": -2.1340737342834473,
      "VeMo (human-opt view)": 0.5615942028985508,
      "VeMo (max entropy view)": 0.5615942028985508,
      "VeMo (min entropy view)": 0.4228456913827655,
      "VeMo (random view)": 0.4228456913827655,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man slowly leans forward, moves around, and carries a heavy object."
  },
  "001888": {
    "text": "the person is kneeling down on all fours to begin to crawl",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7007602801338997,
      "Minus Multimodal Distance": -10.772405624389648,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9976116418838501,
      "MoBERT-F": 0.7205352670682598,
      "MoBERT-N": 0.6496091085017137,
      "MoBERT-min(F/N)": 0.6496091085017137,
      "MoBERT-max(F/N)": 0.7205352670682598,
      "MotionCritic": -5.572525501251221,
      "VeMo (human-opt view)": 0.9956531686113017,
      "VeMo (max entropy view)": 0.9924491967769726,
      "VeMo (min entropy view)": 0.9956531686113017,
      "VeMo (random view)": 0.9924491967769726,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is kneeling down on all fours to begin crawling."
  },
  "009377": {
    "text": "the person is walking forward and turn around like a monster",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6937366031034127,
      "Minus Multimodal Distance": -8.700057029724121,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.013142332434654236,
      "MoBERT-F": 0.5392418324067473,
      "MoBERT-N": 0.49591840505297813,
      "MoBERT-min(F/N)": 0.49591840505297813,
      "MoBERT-max(F/N)": 0.5392418324067473,
      "MotionCritic": -17.66295623779297,
      "VeMo (human-opt view)": 0.899165061014772,
      "VeMo (max entropy view)": 0.6798029556650246,
      "VeMo (min entropy view)": 0.899165061014772,
      "VeMo (random view)": 0.899165061014772,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is walking forward and turning around like a monster."
  },
  "007551": {
    "text": "a figure seems to jog on a treadmill",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3624880107460518,
      "Minus Multimodal Distance": -6.229292869567871,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6614516173140146e-05,
      "MoBERT-F": 0.434096745440966,
      "MoBERT-N": 0.5842953442017654,
      "MoBERT-min(F/N)": 0.434096745440966,
      "MoBERT-max(F/N)": 0.5842953442017654,
      "MotionCritic": -6.074826717376709,
      "VeMo (human-opt view)": 0.9151157512482978,
      "VeMo (max entropy view)": 0.8266953713670614,
      "VeMo (min entropy view)": 0.9151157512482978,
      "VeMo (random view)": 0.9151157512482978,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure seems to be jogging on a treadmill."
  },
  "004952": {
    "text": "a person does a jumping jack in place.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9590277503040641,
      "Minus Multimodal Distance": -3.248321771621704,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9878079295158386,
      "MoBERT-F": 0.9066802902708688,
      "MoBERT-N": 0.8790253753614623,
      "MoBERT-min(F/N)": 0.8790253753614623,
      "MoBERT-max(F/N)": 0.9066802902708688,
      "MotionCritic": -9.645155906677246,
      "VeMo (human-opt view)": 0.9577098243331165,
      "VeMo (max entropy view)": 0.9433290978398984,
      "VeMo (min entropy view)": 0.9577098243331165,
      "VeMo (random view)": 0.9433290978398984,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does a jumping jack in place."
  },
  "011351": {
    "text": "walks forward, turns around, and walks back.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6522896957607579,
      "Minus Multimodal Distance": -2.570122003555298,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9210124611854553,
      "MoBERT-F": 0.6558114781780846,
      "MoBERT-N": 0.620770652670644,
      "MoBERT-min(F/N)": 0.620770652670644,
      "MoBERT-max(F/N)": 0.6558114781780846,
      "MotionCritic": -12.251628875732422,
      "VeMo (human-opt view)": 0.9875577288780223,
      "VeMo (max entropy view)": 0.967062818336163,
      "VeMo (min entropy view)": 0.9875577288780223,
      "VeMo (random view)": 0.967062818336163,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, turns around, and walks back."
  },
  "004668": {
    "text": "person is hunched over creeping diagonally down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8807781608795245,
      "Minus Multimodal Distance": -12.28539752960205,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.20068444311618805,
      "MoBERT-F": 0.5491828272971833,
      "MoBERT-N": 0.5756666337767855,
      "MoBERT-min(F/N)": 0.5491828272971833,
      "MoBERT-max(F/N)": 0.5756666337767855,
      "MotionCritic": -8.254729270935059,
      "VeMo (human-opt view)": 0.9628081905557877,
      "VeMo (max entropy view)": 0.9628081905557877,
      "VeMo (min entropy view)": 0.9668574470917077,
      "VeMo (random view)": 0.9628081905557877,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is hunched over, creeping diagonally down."
  },
  "007556": {
    "text": "sits down than brushes off knees",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8043037507326081,
      "Minus Multimodal Distance": -6.370388507843018,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00040931024705059826,
      "MoBERT-F": 0.34411839025663576,
      "MoBERT-N": 0.43150005972619876,
      "MoBERT-min(F/N)": 0.34411839025663576,
      "MoBERT-max(F/N)": 0.43150005972619876,
      "MotionCritic": -7.567018032073975,
      "VeMo (human-opt view)": 0.8081264108352144,
      "VeMo (max entropy view)": 0.8081264108352144,
      "VeMo (min entropy view)": 0.8673469387755102,
      "VeMo (random view)": 0.8081264108352144,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits down and then brushes off knees."
  },
  "009252": {
    "text": "the man moves to his right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5107235766836007,
      "Minus Multimodal Distance": -5.956791400909424,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3857664928073063e-05,
      "MoBERT-F": 0.4019524349918656,
      "MoBERT-N": 0.4677615905564436,
      "MoBERT-min(F/N)": 0.4019524349918656,
      "MoBERT-max(F/N)": 0.4677615905564436,
      "MotionCritic": -7.5268683433532715,
      "VeMo (human-opt view)": 0.9362023405058513,
      "VeMo (max entropy view)": 0.8937421251574968,
      "VeMo (min entropy view)": 0.9362023405058513,
      "VeMo (random view)": 0.9362023405058513,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man moves to his right."
  },
  "012559": {
    "text": "a person walking side to side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8793441230191062,
      "Minus Multimodal Distance": -3.823629140853882,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3253269318956882e-05,
      "MoBERT-F": 0.34638064511486377,
      "MoBERT-N": 0.5119773639836351,
      "MoBERT-min(F/N)": 0.34638064511486377,
      "MoBERT-max(F/N)": 0.5119773639836351,
      "MotionCritic": -7.862612247467041,
      "VeMo (human-opt view)": 0.8671808054841473,
      "VeMo (max entropy view)": 0.48534201954397393,
      "VeMo (min entropy view)": 0.8671808054841473,
      "VeMo (random view)": 0.8671808054841473,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking side to side."
  },
  "012554": {
    "text": "a person slowly walked forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.34239908398358343,
      "Minus Multimodal Distance": -8.67681884765625,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3988457542145625e-05,
      "MoBERT-F": 0.3990645526060489,
      "MoBERT-N": 0.5254768305381387,
      "MoBERT-min(F/N)": 0.3990645526060489,
      "MoBERT-max(F/N)": 0.5254768305381387,
      "MotionCritic": -5.808572769165039,
      "VeMo (human-opt view)": 0.9914172074523759,
      "VeMo (max entropy view)": 0.9867928005405737,
      "VeMo (min entropy view)": 0.9914172074523759,
      "VeMo (random view)": 0.9867928005405737,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person slowly walked forward."
  },
  "000307": {
    "text": "the figure takes a few slighly hurried steps without raising their arms, it looks they are about to start running but haven't quite yet begun.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.39251701472843165,
      "Minus Multimodal Distance": -6.348809242248535,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00018424286099616438,
      "MoBERT-F": 0.42913228365529316,
      "MoBERT-N": 0.5506939122154811,
      "MoBERT-min(F/N)": 0.42913228365529316,
      "MoBERT-max(F/N)": 0.5506939122154811,
      "MotionCritic": -5.556149005889893,
      "VeMo (human-opt view)": 0.9908716663683551,
      "VeMo (max entropy view)": 0.9808192771084338,
      "VeMo (min entropy view)": 0.9908716663683551,
      "VeMo (random view)": 0.9908716663683551,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The figure takes a few slightly hurried steps without raising their arms. It looks as if they are about to start running but haven't quite begun yet."
  },
  "002950": {
    "text": "a person hops forward with both legs and after a few hops they hop on top of something then back down right after.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.7631699925126947,
      "Minus Multimodal Distance": -4.381792068481445,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9706552028656006,
      "MoBERT-F": 0.7776730482591392,
      "MoBERT-N": 0.8614941014994824,
      "MoBERT-min(F/N)": 0.7776730482591392,
      "MoBERT-max(F/N)": 0.8614941014994824,
      "MotionCritic": -5.419708251953125,
      "VeMo (human-opt view)": 0.06358381502890173,
      "VeMo (max entropy view)": 0.06358381502890173,
      "VeMo (min entropy view)": 0.0006660421779223045,
      "VeMo (random view)": 0.06358381502890173,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person hops forward with both legs. After a few hops, they hop on top of something and then immediately hop back down."
  },
  "007597": {
    "text": "the figure raises its left arm and makes reaching motions as if grabbing for something, and then extends its arms twice.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.29804457902559434,
      "Minus Multimodal Distance": -6.866694927215576,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6664056349545717e-05,
      "MoBERT-F": 0.3095652718783585,
      "MoBERT-N": 0.4258640955981225,
      "MoBERT-min(F/N)": 0.3095652718783585,
      "MoBERT-max(F/N)": 0.4258640955981225,
      "MotionCritic": -8.628093719482422,
      "VeMo (human-opt view)": 0.8669950738916257,
      "VeMo (max entropy view)": 0.8669950738916257,
      "VeMo (min entropy view)": 0.9047817047817048,
      "VeMo (random view)": 0.8669950738916257,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The figure raises its left arm and makes reaching motions as if grabbing for something, and then extends its arms twice."
  },
  "012046": {
    "text": "a person walks on a beam, loses his balance and fall off to his right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8577227337592993,
      "Minus Multimodal Distance": -5.053426265716553,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 6.170613050926477e-05,
      "MoBERT-F": 0.44287573150443127,
      "MoBERT-N": 0.5391611869806775,
      "MoBERT-min(F/N)": 0.44287573150443127,
      "MoBERT-max(F/N)": 0.5391611869806775,
      "MotionCritic": -4.4331865310668945,
      "VeMo (human-opt view)": 0.09517923362175525,
      "VeMo (max entropy view)": 0.09517923362175525,
      "VeMo (min entropy view)": 0.014938886373924853,
      "VeMo (random view)": 0.014938886373924853,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks on a beam, loses his balance and falls off to his right."
  },
  "004724": {
    "text": "a person shifts around in place like a zombie, raising their arms up and down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8110600047936339,
      "Minus Multimodal Distance": -6.127890586853027,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7902968213311397e-05,
      "MoBERT-F": 0.4356981015694148,
      "MoBERT-N": 0.44545619861087044,
      "MoBERT-min(F/N)": 0.4356981015694148,
      "MoBERT-max(F/N)": 0.44545619861087044,
      "MotionCritic": -4.581050395965576,
      "VeMo (human-opt view)": 0.7047244094488189,
      "VeMo (max entropy view)": 0.6666666666666666,
      "VeMo (min entropy view)": 0.7047244094488189,
      "VeMo (random view)": 0.7047244094488189,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person shuffles around in place like a zombie, raising their arms up and down."
  },
  "005376": {
    "text": "person walks straight in pretty slow phase, turns around and walks in oppsite direction.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5277293576863369,
      "Minus Multimodal Distance": -12.98826789855957,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8775920271873474,
      "MoBERT-F": 0.6731731428871504,
      "MoBERT-N": 0.5694628375387275,
      "MoBERT-min(F/N)": 0.5694628375387275,
      "MoBERT-max(F/N)": 0.6731731428871504,
      "MotionCritic": -21.22779083251953,
      "VeMo (human-opt view)": 0.9602824360105914,
      "VeMo (max entropy view)": 0.8171206225680934,
      "VeMo (min entropy view)": 0.9602824360105914,
      "VeMo (random view)": 0.8171206225680934,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks straight at a pretty slow pace, turns around, and walks in the opposite direction."
  },
  "002658": {
    "text": "person swings their right arm and then has an injury on the elbow.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.528691437812236,
      "Minus Multimodal Distance": -2.6581127643585205,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.0921688448870555e-05,
      "MoBERT-F": 0.3243220076335117,
      "MoBERT-N": 0.4577955596505849,
      "MoBERT-min(F/N)": 0.3243220076335117,
      "MoBERT-max(F/N)": 0.4577955596505849,
      "MotionCritic": -6.978346347808838,
      "VeMo (human-opt view)": 0.5623003194888179,
      "VeMo (max entropy view)": 0.4846153846153846,
      "VeMo (min entropy view)": 0.5623003194888179,
      "VeMo (random view)": 0.4846153846153846,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person swings their right arm and then sustains an injury to the elbow."
  },
  "006337": {
    "text": "a figure tip toes around while walking in a slolam like motion",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6910577604099755,
      "Minus Multimodal Distance": -9.522650718688965,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.555900573497638e-05,
      "MoBERT-F": 0.4712360822800302,
      "MoBERT-N": 0.4159452120225292,
      "MoBERT-min(F/N)": 0.4159452120225292,
      "MoBERT-max(F/N)": 0.4712360822800302,
      "MotionCritic": -2.6485722064971924,
      "VeMo (human-opt view)": 0.6666666666666666,
      "VeMo (max entropy view)": 0.6666666666666666,
      "VeMo (min entropy view)": 0.6924493554327809,
      "VeMo (random view)": 0.6666666666666666,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure tiptoes around while walking in a slalom - like motion."
  },
  "001835": {
    "text": "a person standing up strikes their hands together well above their head.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4163331429945234,
      "Minus Multimodal Distance": -6.269257068634033,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0006129126413725317,
      "MoBERT-F": 0.5557859709723904,
      "MoBERT-N": 0.49875448659980526,
      "MoBERT-min(F/N)": 0.49875448659980526,
      "MoBERT-max(F/N)": 0.5557859709723904,
      "MotionCritic": -10.995769500732422,
      "VeMo (human-opt view)": 0.9924190213645762,
      "VeMo (max entropy view)": 0.9924190213645762,
      "VeMo (min entropy view)": 0.9937081388268724,
      "VeMo (random view)": 0.9924190213645762,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person standing up strikes their hands together well above their head."
  },
  "013344": {
    "text": "in a fighting stance, person punches downward with their left hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5424845440133087,
      "Minus Multimodal Distance": -7.071192264556885,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.4639502700883895e-05,
      "MoBERT-F": 0.26589767546369464,
      "MoBERT-N": 0.35577578655529773,
      "MoBERT-min(F/N)": 0.26589767546369464,
      "MoBERT-max(F/N)": 0.35577578655529773,
      "MotionCritic": -5.373310089111328,
      "VeMo (human-opt view)": 0.8738229755178908,
      "VeMo (max entropy view)": 0.8267716535433071,
      "VeMo (min entropy view)": 0.8738229755178908,
      "VeMo (random view)": 0.8267716535433071,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "In a fighting stance, a person punches downward with their left hand."
  },
  "003138": {
    "text": "a person opens a door and appears to be swimming.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5563011053841593,
      "Minus Multimodal Distance": -11.142293930053711,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.6955385187175125e-05,
      "MoBERT-F": 0.324778093978993,
      "MoBERT-N": 0.3894123310373652,
      "MoBERT-min(F/N)": 0.324778093978993,
      "MoBERT-max(F/N)": 0.3894123310373652,
      "MotionCritic": -10.372699737548828,
      "VeMo (human-opt view)": 0.0010996417296300237,
      "VeMo (max entropy view)": 0.001324371200744751,
      "VeMo (min entropy view)": 0.0010996417296300237,
      "VeMo (random view)": 0.0010996417296300237,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person opens a door and appears to be swimming."
  },
  "012799": {
    "text": "a person is shooting a basketball with both hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8668608583710695,
      "Minus Multimodal Distance": -10.765555381774902,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.155760325375013e-05,
      "MoBERT-F": 0.3187563359140413,
      "MoBERT-N": 0.37133103257036143,
      "MoBERT-min(F/N)": 0.3187563359140413,
      "MoBERT-max(F/N)": 0.37133103257036143,
      "MotionCritic": -4.7976179122924805,
      "VeMo (human-opt view)": 5.16263879817113e-05,
      "VeMo (max entropy view)": 0.34782608695652173,
      "VeMo (min entropy view)": 5.16263879817113e-05,
      "VeMo (random view)": 0.34782608695652173,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is shooting a basketball with both hands."
  },
  "008317": {
    "text": "a person walks straight and turns to the right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2951043167782813,
      "Minus Multimodal Distance": -13.32645034790039,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.30755267693894e-05,
      "MoBERT-F": 0.35637682224002204,
      "MoBERT-N": 0.5467357662432217,
      "MoBERT-min(F/N)": 0.35637682224002204,
      "MoBERT-max(F/N)": 0.5467357662432217,
      "MotionCritic": -2.1627542972564697,
      "VeMo (human-opt view)": 0.9466537342386033,
      "VeMo (max entropy view)": 0.9466537342386033,
      "VeMo (min entropy view)": 0.9688179840464104,
      "VeMo (random view)": 0.9688179840464104,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks straight and turns to the right."
  },
  "005951": {
    "text": "a person stands relaxed seems to observe something",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3302723727175764,
      "Minus Multimodal Distance": -4.087207794189453,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.458438368397765e-05,
      "MoBERT-F": 0.3125567272301065,
      "MoBERT-N": 0.358131782245576,
      "MoBERT-min(F/N)": 0.3125567272301065,
      "MoBERT-max(F/N)": 0.358131782245576,
      "MotionCritic": -6.498644828796387,
      "VeMo (human-opt view)": 0.9323583180987203,
      "VeMo (max entropy view)": 0.9194601654331737,
      "VeMo (min entropy view)": 0.9323583180987203,
      "VeMo (random view)": 0.9194601654331737,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands relaxed and seems to be observing something."
  },
  "014565": {
    "text": "person walks forward, turns and walks back",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6038434412060376,
      "Minus Multimodal Distance": -2.6322526931762695,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.1779738813638687,
      "MoBERT-F": 0.5753365110882236,
      "MoBERT-N": 0.600272981098008,
      "MoBERT-min(F/N)": 0.5753365110882236,
      "MoBERT-max(F/N)": 0.600272981098008,
      "MotionCritic": -11.974693298339844,
      "VeMo (human-opt view)": 0.9604365620736699,
      "VeMo (max entropy view)": 0.9604365620736699,
      "VeMo (min entropy view)": 0.9668574470917077,
      "VeMo (random view)": 0.9604365620736699,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, turns around and walks back."
  },
  "011589": {
    "text": "laying down and crawling forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4094864275101515,
      "Minus Multimodal Distance": -3.2786073684692383,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.3886611820198596e-05,
      "MoBERT-F": 0.4022391601820262,
      "MoBERT-N": 0.465138158869889,
      "MoBERT-min(F/N)": 0.4022391601820262,
      "MoBERT-max(F/N)": 0.465138158869889,
      "MotionCritic": -6.980914115905762,
      "VeMo (human-opt view)": 0.98302985259507,
      "VeMo (max entropy view)": 0.9198184568835098,
      "VeMo (min entropy view)": 0.98302985259507,
      "VeMo (random view)": 0.98302985259507,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is lying down and crawling forward."
  },
  "006652": {
    "text": "the man is doing starjumps",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6719577917521566,
      "Minus Multimodal Distance": -7.846965789794922,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9898423552513123,
      "MoBERT-F": 0.7902726443851718,
      "MoBERT-N": 0.7749700817487549,
      "MoBERT-min(F/N)": 0.7749700817487549,
      "MoBERT-max(F/N)": 0.7902726443851718,
      "MotionCritic": -6.794870376586914,
      "VeMo (human-opt view)": 0.7318007662835249,
      "VeMo (max entropy view)": 0.7318007662835249,
      "VeMo (min entropy view)": 0.8171206225680934,
      "VeMo (random view)": 0.7318007662835249,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man is doing star jumps."
  },
  "009867": {
    "text": "bending down and moving hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.49781537653484703,
      "Minus Multimodal Distance": -3.6360809803009033,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.4109663963317871,
      "MoBERT-F": 0.6168773360977089,
      "MoBERT-N": 0.6586082236759265,
      "MoBERT-min(F/N)": 0.6168773360977089,
      "MoBERT-max(F/N)": 0.6586082236759265,
      "MotionCritic": -6.916032314300537,
      "VeMo (human-opt view)": 0.9978134803688563,
      "VeMo (max entropy view)": 0.955411510508741,
      "VeMo (min entropy view)": 0.9978134803688563,
      "VeMo (random view)": 0.9978134803688563,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is bending down and moving hands."
  },
  "009919": {
    "text": "a person is raising their arms in the air with their legs bent as if lifting weights.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7780280593431401,
      "Minus Multimodal Distance": -8.696760177612305,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.505309385014698e-05,
      "MoBERT-F": 0.4446700501591262,
      "MoBERT-N": 0.4869580386061792,
      "MoBERT-min(F/N)": 0.4446700501591262,
      "MoBERT-max(F/N)": 0.4869580386061792,
      "MotionCritic": -2.9431190490722656,
      "VeMo (human-opt view)": 0.1065989847715736,
      "VeMo (max entropy view)": 0.17318435754189945,
      "VeMo (min entropy view)": 0.1065989847715736,
      "VeMo (random view)": 0.17318435754189945,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is raising their arms in the air with their legs bent, as if they are lifting weights."
  },
  "008564": {
    "text": "a person is wrinkling a paper in their hands and moving their feet from side to side",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6036859489574921,
      "Minus Multimodal Distance": -7.035995960235596,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.321834446978755e-05,
      "MoBERT-F": 0.35276709947574014,
      "MoBERT-N": 0.40625901457880387,
      "MoBERT-min(F/N)": 0.35276709947574014,
      "MoBERT-max(F/N)": 0.40625901457880387,
      "MotionCritic": -6.382319927215576,
      "VeMo (human-opt view)": 0.010977620730270907,
      "VeMo (max entropy view)": 0.010977620730270907,
      "VeMo (min entropy view)": 0.0019320239467003977,
      "VeMo (random view)": 0.0019320239467003977,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is wrinkling a piece of paper in their hands and moving their feet from side to side."
  },
  "012127": {
    "text": "a man staggers backwards from a standing posture, swinging his arms, before ending in a standing posture.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3673485691557467,
      "Minus Multimodal Distance": -9.144791603088379,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.994150698184967,
      "MoBERT-F": 0.6887101568724736,
      "MoBERT-N": 0.5530552221550176,
      "MoBERT-min(F/N)": 0.5530552221550176,
      "MoBERT-max(F/N)": 0.6887101568724736,
      "MotionCritic": -6.54386043548584,
      "VeMo (human-opt view)": 0.32113821138211385,
      "VeMo (max entropy view)": 0.3920595533498759,
      "VeMo (min entropy view)": 0.32113821138211385,
      "VeMo (random view)": 0.32113821138211385,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man staggers backwards from a standing position, swinging his arms, before ending up in a standing position."
  },
  "006433": {
    "text": "a person lifts both arms out to their side and runs forward in a figure 8 pattern.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2166001022843518,
      "Minus Multimodal Distance": -4.9695329666137695,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0032046958804130554,
      "MoBERT-F": 0.5230012350699426,
      "MoBERT-N": 0.5681071838054509,
      "MoBERT-min(F/N)": 0.5230012350699426,
      "MoBERT-max(F/N)": 0.5681071838054509,
      "MotionCritic": -7.004248142242432,
      "VeMo (human-opt view)": 0.0002612050545319077,
      "VeMo (max entropy view)": 0.0024678529679179116,
      "VeMo (min entropy view)": 0.0002612050545319077,
      "VeMo (random view)": 0.0024678529679179116,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lifts both arms out to the sides and runs forward in a figure - 8 pattern."
  },
  "003481": {
    "text": "a person steps forward and kicks aggressively with their right leg, then quickly squats and jumps.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7413294537228083,
      "Minus Multimodal Distance": -5.546204566955566,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9677146077156067,
      "MoBERT-F": 0.7309013306141879,
      "MoBERT-N": 0.6475418207028648,
      "MoBERT-min(F/N)": 0.6475418207028648,
      "MoBERT-max(F/N)": 0.7309013306141879,
      "MotionCritic": -9.145050048828125,
      "VeMo (human-opt view)": 0.9554785390562517,
      "VeMo (max entropy view)": 0.8513853904282116,
      "VeMo (min entropy view)": 0.9554785390562517,
      "VeMo (random view)": 0.9554785390562517,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps forward and kicks aggressively with their right leg, then quickly squats and jumps."
  },
  "007473": {
    "text": "a person throwing a punch upwards similar to a uppercut motion",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9152048175543164,
      "Minus Multimodal Distance": -5.868484020233154,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0007100173970684409,
      "MoBERT-F": 0.5029734186880133,
      "MoBERT-N": 0.5423619429413186,
      "MoBERT-min(F/N)": 0.5029734186880133,
      "MoBERT-max(F/N)": 0.5423619429413186,
      "MotionCritic": -6.862800121307373,
      "VeMo (human-opt view)": 0.7879924953095685,
      "VeMo (max entropy view)": 0.7879924953095685,
      "VeMo (min entropy view)": 0.8265682656826568,
      "VeMo (random view)": 0.7879924953095685,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person throwing a punch upwards, similar to an uppercut motion."
  },
  "003441": {
    "text": "a person appears to be raising both his arms with difficulty.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.40544406641199815,
      "Minus Multimodal Distance": -9.653335571289062,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8944486984983087e-05,
      "MoBERT-F": 0.37917760190308414,
      "MoBERT-N": 0.4387666954617124,
      "MoBERT-min(F/N)": 0.37917760190308414,
      "MoBERT-max(F/N)": 0.4387666954617124,
      "MotionCritic": -6.541163921356201,
      "VeMo (human-opt view)": 0.5769230769230769,
      "VeMo (max entropy view)": 0.5769230769230769,
      "VeMo (min entropy view)": 0.6783625730994152,
      "VeMo (random view)": 0.5769230769230769,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person appears to be raising both his arms with difficulty."
  },
  "005457": {
    "text": "person stands still with both arms raised at shoulder height",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.20912207423427123,
      "Minus Multimodal Distance": -5.0273756980896,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.094222120125778e-05,
      "MoBERT-F": 0.38096440408762283,
      "MoBERT-N": 0.46773468225412745,
      "MoBERT-min(F/N)": 0.38096440408762283,
      "MoBERT-max(F/N)": 0.46773468225412745,
      "MotionCritic": -8.09535026550293,
      "VeMo (human-opt view)": 0.9959298429148209,
      "VeMo (max entropy view)": 0.994797869006581,
      "VeMo (min entropy view)": 0.9959298429148209,
      "VeMo (random view)": 0.9959298429148209,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands still with both arms raised to shoulder height."
  },
  "008696": {
    "text": "a person stands on one leg in yoga pose.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5747873237172832,
      "Minus Multimodal Distance": -9.609789848327637,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4411823687842116e-05,
      "MoBERT-F": 0.4303230668364929,
      "MoBERT-N": 0.4976317861014717,
      "MoBERT-min(F/N)": 0.4303230668364929,
      "MoBERT-max(F/N)": 0.4976317861014717,
      "MotionCritic": -7.0052313804626465,
      "VeMo (human-opt view)": 0.8597748208802457,
      "VeMo (max entropy view)": 0.8356374807987711,
      "VeMo (min entropy view)": 0.8597748208802457,
      "VeMo (random view)": 0.8597748208802457,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands on one leg in a yoga pose."
  },
  "003783": {
    "text": "a person lunges over and gets back up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8494197408037067,
      "Minus Multimodal Distance": -8.147965431213379,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.7891322374343872,
      "MoBERT-F": 0.5582485642461454,
      "MoBERT-N": 0.42445960582655373,
      "MoBERT-min(F/N)": 0.42445960582655373,
      "MoBERT-max(F/N)": 0.5582485642461454,
      "MotionCritic": -5.4450273513793945,
      "VeMo (human-opt view)": 0.9707163237723382,
      "VeMo (max entropy view)": 0.9707163237723382,
      "VeMo (min entropy view)": 0.9770491803278688,
      "VeMo (random view)": 0.9770491803278688,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lunges forward and then gets back up."
  },
  "006385": {
    "text": "a person standing still shuffles to the left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.21831833418358507,
      "Minus Multimodal Distance": -5.835124969482422,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3638440325157717e-05,
      "MoBERT-F": 0.3626824279508833,
      "MoBERT-N": 0.45917197276959126,
      "MoBERT-min(F/N)": 0.3626824279508833,
      "MoBERT-max(F/N)": 0.45917197276959126,
      "MotionCritic": -5.5855488777160645,
      "VeMo (human-opt view)": 0.939896654881697,
      "VeMo (max entropy view)": 0.8806660499537465,
      "VeMo (min entropy view)": 0.939896654881697,
      "VeMo (random view)": 0.939896654881697,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person standing still shuffles to the left."
  },
  "004050": {
    "text": "the person to march forward and turned left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3550383437152848,
      "Minus Multimodal Distance": -4.590670108795166,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8713931897073053e-05,
      "MoBERT-F": 0.5516564114754781,
      "MoBERT-N": 0.6002870378249289,
      "MoBERT-min(F/N)": 0.5516564114754781,
      "MoBERT-max(F/N)": 0.6002870378249289,
      "MotionCritic": -11.98143482208252,
      "VeMo (human-opt view)": 0.6078886310904872,
      "VeMo (max entropy view)": 0.6078886310904872,
      "VeMo (min entropy view)": 0.7879746835443038,
      "VeMo (random view)": 0.6078886310904872,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person marched forward and turned left."
  },
  "010618": {
    "text": "person is throwing a ball hard.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7040116047465238,
      "Minus Multimodal Distance": -6.138907432556152,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.7953687906265259,
      "MoBERT-F": 0.7329435936233655,
      "MoBERT-N": 0.7109219589624778,
      "MoBERT-min(F/N)": 0.7109219589624778,
      "MoBERT-max(F/N)": 0.7329435936233655,
      "MotionCritic": -4.471678256988525,
      "VeMo (human-opt view)": 0.2225201072386059,
      "VeMo (max entropy view)": 0.46887966804979253,
      "VeMo (min entropy view)": 0.2225201072386059,
      "VeMo (random view)": 0.2225201072386059,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is throwing a ball hard."
  },
  "004134": {
    "text": "the man stretches his arms.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6225638858592394,
      "Minus Multimodal Distance": -8.289240837097168,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00010797077266033739,
      "MoBERT-F": 0.5851359511939944,
      "MoBERT-N": 0.5970232800208711,
      "MoBERT-min(F/N)": 0.5851359511939944,
      "MoBERT-max(F/N)": 0.5970232800208711,
      "MotionCritic": -7.087886810302734,
      "VeMo (human-opt view)": 0.9979594944419065,
      "VeMo (max entropy view)": 0.9979594944419065,
      "VeMo (min entropy view)": 0.9990880654239516,
      "VeMo (random view)": 0.9990880654239516,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man stretches his arms."
  },
  "005485": {
    "text": "a man is walking forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3494560291349916,
      "Minus Multimodal Distance": -7.257198333740234,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.096057818969712e-05,
      "MoBERT-F": 0.49433417508731836,
      "MoBERT-N": 0.5559520431137961,
      "MoBERT-min(F/N)": 0.49433417508731836,
      "MoBERT-max(F/N)": 0.5559520431137961,
      "MotionCritic": -5.2432074546813965,
      "VeMo (human-opt view)": 0.9755498721227621,
      "VeMo (max entropy view)": 0.9755498721227621,
      "VeMo (min entropy view)": 0.9859407717618905,
      "VeMo (random view)": 0.9859407717618905,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is walking forward."
  },
  "009584": {
    "text": "a person is sneaking around",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7490456679574304,
      "Minus Multimodal Distance": -7.74791955947876,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.002975527895614505,
      "MoBERT-F": 0.507099299547656,
      "MoBERT-N": 0.672653048011157,
      "MoBERT-min(F/N)": 0.507099299547656,
      "MoBERT-max(F/N)": 0.672653048011157,
      "MotionCritic": -6.167942047119141,
      "VeMo (human-opt view)": 0.9526627218934911,
      "VeMo (max entropy view)": 0.9049773755656109,
      "VeMo (min entropy view)": 0.9526627218934911,
      "VeMo (random view)": 0.9049773755656109,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is sneaking around."
  },
  "013647": {
    "text": "the person is pacing back-and-forth.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8450915843878365,
      "Minus Multimodal Distance": -6.039450645446777,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.11159974336624146,
      "MoBERT-F": 0.6555559159772892,
      "MoBERT-N": 0.6955122894586081,
      "MoBERT-min(F/N)": 0.6555559159772892,
      "MoBERT-max(F/N)": 0.6955122894586081,
      "MotionCritic": -13.393754959106445,
      "VeMo (human-opt view)": 0.9770423027619158,
      "VeMo (max entropy view)": 0.9770423027619158,
      "VeMo (min entropy view)": 0.9808429118773946,
      "VeMo (random view)": 0.9808429118773946,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is pacing back and forth."
  },
  "004708": {
    "text": "the person steps a little wider than shoulder width apart first with their right foot, then with their left before squatting 4 times.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7678190624732977,
      "Minus Multimodal Distance": -7.516376495361328,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.329217285383493e-05,
      "MoBERT-F": 0.44761866073335316,
      "MoBERT-N": 0.5435741101702702,
      "MoBERT-min(F/N)": 0.44761866073335316,
      "MoBERT-max(F/N)": 0.5435741101702702,
      "MotionCritic": -1.00382661819458,
      "VeMo (human-opt view)": 0.8994708994708994,
      "VeMo (max entropy view)": 0.8598848368522073,
      "VeMo (min entropy view)": 0.8994708994708994,
      "VeMo (random view)": 0.8994708994708994,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person steps a little wider than shoulder - width apart, first with their right foot, then with their left, before squatting 4 times."
  },
  "000389": {
    "text": "a standing man loses a little bit of balance and his upper body leans and shakes toward his left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.18188122783179947,
      "Minus Multimodal Distance": -8.828605651855469,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3875692932051606e-05,
      "MoBERT-F": 0.36964317700572175,
      "MoBERT-N": 0.4259625536360087,
      "MoBERT-min(F/N)": 0.36964317700572175,
      "MoBERT-max(F/N)": 0.4259625536360087,
      "MotionCritic": -3.097778081893921,
      "VeMo (human-opt view)": 0.3626373626373626,
      "VeMo (max entropy view)": 0.3626373626373626,
      "VeMo (min entropy view)": 0.6924315619967794,
      "VeMo (random view)": 0.3626373626373626,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A standing man loses a little bit of balance, and his upper body leans and shakes to his left."
  },
  "013253": {
    "text": "a person is bent head over toes jumping and throwing arms wildly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2903392127256472,
      "Minus Multimodal Distance": -2.9437382221221924,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9243261218070984,
      "MoBERT-F": 0.7839879941991756,
      "MoBERT-N": 0.701073484430473,
      "MoBERT-min(F/N)": 0.701073484430473,
      "MoBERT-max(F/N)": 0.7839879941991756,
      "MotionCritic": -7.564637660980225,
      "VeMo (human-opt view)": 0.37777777777777777,
      "VeMo (max entropy view)": 0.5786802030456852,
      "VeMo (min entropy view)": 0.37777777777777777,
      "VeMo (random view)": 0.5786802030456852,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is bent head over toes, jumping and throwing their arms wildly."
  },
  "014352": {
    "text": "a person raises their arms and throws them back down in frustration",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3911930816722564,
      "Minus Multimodal Distance": -3.314051628112793,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00022534647723659873,
      "MoBERT-F": 0.5205471205354818,
      "MoBERT-N": 0.5211735992021425,
      "MoBERT-min(F/N)": 0.5205471205354818,
      "MoBERT-max(F/N)": 0.5211735992021425,
      "MotionCritic": -6.699121475219727,
      "VeMo (human-opt view)": 0.9706591070163005,
      "VeMo (max entropy view)": 0.9706591070163005,
      "VeMo (min entropy view)": 0.9850567842199641,
      "VeMo (random view)": 0.9850567842199641,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises their arms and then throws them back down in frustration."
  },
  "006692": {
    "text": "a person stands with their arms stretched out then sits down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8471095906718256,
      "Minus Multimodal Distance": -7.444895267486572,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.09895393997430801,
      "MoBERT-F": 0.5713120657991687,
      "MoBERT-N": 0.5199606076057409,
      "MoBERT-min(F/N)": 0.5199606076057409,
      "MoBERT-max(F/N)": 0.5713120657991687,
      "MotionCritic": -8.289738655090332,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.8736196319018404,
      "VeMo (random view)": 0.5,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands with their arms stretched out, then sits down."
  },
  "005668": {
    "text": "the person is hand mixing dough to bake something",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.2995698421343632,
      "Minus Multimodal Distance": -9.318448066711426,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.99055786349345e-05,
      "MoBERT-F": 0.2610454848070291,
      "MoBERT-N": 0.3476329851704872,
      "MoBERT-min(F/N)": 0.2610454848070291,
      "MoBERT-max(F/N)": 0.3476329851704872,
      "MotionCritic": -4.18288516998291,
      "VeMo (human-opt view)": 0.00040436140290249544,
      "VeMo (max entropy view)": 0.00040436140290249544,
      "VeMo (min entropy view)": 1.0758187784553531e-05,
      "VeMo (random view)": 1.0758187784553531e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is hand - mixing dough to bake something."
  },
  "010199": {
    "text": "a figure wind mill kicks around the mosh pit.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2792338339888938,
      "Minus Multimodal Distance": -5.542825698852539,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9963853359222412,
      "MoBERT-F": 0.8410210152212021,
      "MoBERT-N": 0.7812087501780047,
      "MoBERT-min(F/N)": 0.7812087501780047,
      "MoBERT-max(F/N)": 0.8410210152212021,
      "MotionCritic": -2.6287500858306885,
      "VeMo (human-opt view)": 0.5925925925925926,
      "VeMo (max entropy view)": 0.5925925925925926,
      "VeMo (min entropy view)": 0.6216216216216216,
      "VeMo (random view)": 0.5925925925925926,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure does windmill kicks around the mosh pit."
  },
  "010270": {
    "text": "a person is jogging on a treadmill",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5974127319914742,
      "Minus Multimodal Distance": -5.723470211029053,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7230164050706662e-05,
      "MoBERT-F": 0.4830398635077357,
      "MoBERT-N": 0.564589962518452,
      "MoBERT-min(F/N)": 0.4830398635077357,
      "MoBERT-max(F/N)": 0.564589962518452,
      "MotionCritic": -8.367162704467773,
      "VeMo (human-opt view)": 0.7985480943738656,
      "VeMo (max entropy view)": 0.5628415300546448,
      "VeMo (min entropy view)": 0.7985480943738656,
      "VeMo (random view)": 0.5628415300546448,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is jogging on a treadmill."
  },
  "006332": {
    "text": "a man walks in a clockwise circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7298409674066064,
      "Minus Multimodal Distance": -8.406964302062988,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0008702260674908757,
      "MoBERT-F": 0.5184425526233032,
      "MoBERT-N": 0.6291878460526666,
      "MoBERT-min(F/N)": 0.5184425526233032,
      "MoBERT-max(F/N)": 0.6291878460526666,
      "MotionCritic": -9.335809707641602,
      "VeMo (human-opt view)": 0.8174204355108877,
      "VeMo (max entropy view)": 0.8174204355108877,
      "VeMo (min entropy view)": 0.8263888888888888,
      "VeMo (random view)": 0.8174204355108877,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks in a clockwise circle."
  },
  "000021": {
    "text": "person is walking normally in a circle",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5397026497475481,
      "Minus Multimodal Distance": -7.490419864654541,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.008463161997497082,
      "MoBERT-F": 0.5926417697434215,
      "MoBERT-N": 0.6335829131839397,
      "MoBERT-min(F/N)": 0.5926417697434215,
      "MoBERT-max(F/N)": 0.6335829131839397,
      "MotionCritic": -10.68444538116455,
      "VeMo (human-opt view)": 0.7311827956989247,
      "VeMo (max entropy view)": 0.7302158273381295,
      "VeMo (min entropy view)": 0.7311827956989247,
      "VeMo (random view)": 0.7311827956989247,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking normally in a circle."
  },
  "014436": {
    "text": "a stick figure jumps side ways.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5101889516678039,
      "Minus Multimodal Distance": -5.651890754699707,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.5809328556060791,
      "MoBERT-F": 0.7180825502224841,
      "MoBERT-N": 0.6730600065855151,
      "MoBERT-min(F/N)": 0.6730600065855151,
      "MoBERT-max(F/N)": 0.7180825502224841,
      "MotionCritic": -3.6103343963623047,
      "VeMo (human-opt view)": 0.4847161572052402,
      "VeMo (max entropy view)": 0.4847161572052402,
      "VeMo (min entropy view)": 0.3769633507853403,
      "VeMo (random view)": 0.3769633507853403,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure jumps sideways."
  },
  "008399": {
    "text": "shaking legs side to side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.41885619349905595,
      "Minus Multimodal Distance": -10.3693265914917,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6027084459201433e-05,
      "MoBERT-F": 0.439013747523738,
      "MoBERT-N": 0.6089526944472774,
      "MoBERT-min(F/N)": 0.439013747523738,
      "MoBERT-max(F/N)": 0.6089526944472774,
      "MotionCritic": -2.7775232791900635,
      "VeMo (human-opt view)": 0.6666666666666666,
      "VeMo (max entropy view)": 0.6666666666666666,
      "VeMo (min entropy view)": 0.6924493554327809,
      "VeMo (random view)": 0.6924493554327809,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is shaking legs side to side."
  },
  "000679": {
    "text": "drunk walking animation turning around",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9094466904805626,
      "Minus Multimodal Distance": -4.171241283416748,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.974036760861054e-05,
      "MoBERT-F": 0.4550063693206168,
      "MoBERT-N": 0.5189072961198463,
      "MoBERT-min(F/N)": 0.4550063693206168,
      "MoBERT-max(F/N)": 0.5189072961198463,
      "MotionCritic": -10.198461532592773,
      "VeMo (human-opt view)": 0.9432943294329433,
      "VeMo (max entropy view)": 0.9152542372881356,
      "VeMo (min entropy view)": 0.9432943294329433,
      "VeMo (random view)": 0.9152542372881356,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Drunk Walking Animation: Turning Around"
  },
  "002168": {
    "text": "a person stands up straight from a bent over position, touches his head with his left arm, then bends his torso to the right and swings forward with his arms dangling in front of him. i",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6434760154558605,
      "Minus Multimodal Distance": -4.011138916015625,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.604445737437345e-05,
      "MoBERT-F": 0.4176138503862212,
      "MoBERT-N": 0.44931123463897116,
      "MoBERT-min(F/N)": 0.4176138503862212,
      "MoBERT-max(F/N)": 0.44931123463897116,
      "MotionCritic": -5.0383710861206055,
      "VeMo (human-opt view)": 0.6657534246575343,
      "VeMo (max entropy view)": 0.6657534246575343,
      "VeMo (min entropy view)": 0.8667496886674969,
      "VeMo (random view)": 0.8667496886674969,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands up straight from a bent - over position, touches their head with their left arm, then bends their torso to the right and swings forward with their arms dangling in front of them."
  },
  "007423": {
    "text": "a figure seems to be gesturing for people to run past or through or under their other arm",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7076097084271867,
      "Minus Multimodal Distance": -12.591119766235352,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.7202739119529724,
      "MoBERT-F": 0.6727967837670994,
      "MoBERT-N": 0.5961094546698451,
      "MoBERT-min(F/N)": 0.5961094546698451,
      "MoBERT-max(F/N)": 0.6727967837670994,
      "MotionCritic": -5.249857425689697,
      "VeMo (human-opt view)": 0.07560627674750357,
      "VeMo (max entropy view)": 0.15619967793880837,
      "VeMo (min entropy view)": 0.07560627674750357,
      "VeMo (random view)": 0.15619967793880837,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure seems to be gesturing for people to run past, through, or under their other arm."
  },
  "000507": {
    "text": "a person while sitting on the floor throws with his right arm and then stands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7111653618239948,
      "Minus Multimodal Distance": -9.20439338684082,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0008435308118350804,
      "MoBERT-F": 0.35450544869560446,
      "MoBERT-N": 0.37747195515510495,
      "MoBERT-min(F/N)": 0.35450544869560446,
      "MoBERT-max(F/N)": 0.37747195515510495,
      "MotionCritic": -7.424585342407227,
      "VeMo (human-opt view)": 0.826722338204593,
      "VeMo (max entropy view)": 0.826722338204593,
      "VeMo (min entropy view)": 0.9044967880085653,
      "VeMo (random view)": 0.9044967880085653,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person, while sitting on the floor, throws with his right arm and then stands up."
  },
  "012639": {
    "text": "a person has their right hand on their head and walks around in a circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.953556073503839,
      "Minus Multimodal Distance": -4.690010070800781,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0024277279153466225,
      "MoBERT-F": 0.513932724390137,
      "MoBERT-N": 0.4031735292240407,
      "MoBERT-min(F/N)": 0.4031735292240407,
      "MoBERT-max(F/N)": 0.513932724390137,
      "MotionCritic": -7.118940830230713,
      "VeMo (human-opt view)": 0.9362928797924474,
      "VeMo (max entropy view)": 0.909952606635071,
      "VeMo (min entropy view)": 0.9362928797924474,
      "VeMo (random view)": 0.9362928797924474,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person has their right hand on their head and walks around in a circle."
  },
  "007232": {
    "text": "a shorter figure jumps and high fives someone taller than him out of excitement",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6184056461296958,
      "Minus Multimodal Distance": -9.541090965270996,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9781412482261658,
      "MoBERT-F": 0.7864831499725493,
      "MoBERT-N": 0.7914988528945125,
      "MoBERT-min(F/N)": 0.7864831499725493,
      "MoBERT-max(F/N)": 0.7914988528945125,
      "MotionCritic": -2.450974941253662,
      "VeMo (human-opt view)": 3.140115954309986e-05,
      "VeMo (max entropy view)": 3.537581359511954e-05,
      "VeMo (min entropy view)": 3.140115954309986e-05,
      "VeMo (random view)": 3.537581359511954e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A shorter figure jumps and high fives someone taller than him out of excitement"
  },
  "003020": {
    "text": "a man slowly sways from side to side, sightly bending his knees.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.46360233761554054,
      "Minus Multimodal Distance": -9.235753059387207,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.281267734360881e-05,
      "MoBERT-F": 0.2861017283024051,
      "MoBERT-N": 0.457259746238427,
      "MoBERT-min(F/N)": 0.2861017283024051,
      "MoBERT-max(F/N)": 0.457259746238427,
      "MotionCritic": -1.081175684928894,
      "VeMo (human-opt view)": 0.39303482587064675,
      "VeMo (max entropy view)": 0.5623342175066313,
      "VeMo (min entropy view)": 0.39303482587064675,
      "VeMo (random view)": 0.39303482587064675,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man slowly sways from side to side, slightly bending his knees."
  },
  "013765": {
    "text": "a person moves around in a random fashion, trying to dodge something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0789816966197496,
      "Minus Multimodal Distance": -6.810286045074463,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9701340198516846,
      "MoBERT-F": 0.7717078171797371,
      "MoBERT-N": 0.5613067324671271,
      "MoBERT-min(F/N)": 0.5613067324671271,
      "MoBERT-max(F/N)": 0.7717078171797371,
      "MotionCritic": -11.879618644714355,
      "VeMo (human-opt view)": 0.8076923076923077,
      "VeMo (max entropy view)": 0.8076923076923077,
      "VeMo (min entropy view)": 0.8349514563106796,
      "VeMo (random view)": 0.8349514563106796,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves around randomly, trying to dodge something."
  },
  "003547": {
    "text": "a person tries to screw something large into place.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5211716872450797,
      "Minus Multimodal Distance": -5.56899356842041,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.044251884100959e-05,
      "MoBERT-F": 0.3251861973551696,
      "MoBERT-N": 0.40816478106887966,
      "MoBERT-min(F/N)": 0.3251861973551696,
      "MoBERT-max(F/N)": 0.40816478106887966,
      "MotionCritic": -6.2344255447387695,
      "VeMo (human-opt view)": 0.008083632164346927,
      "VeMo (max entropy view)": 0.03748597081930415,
      "VeMo (min entropy view)": 0.008083632164346927,
      "VeMo (random view)": 0.008083632164346927,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person tries to screw something large into place."
  },
  "004320": {
    "text": "a man walks forward slowly, then turns around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6764929042282162,
      "Minus Multimodal Distance": -10.87791919708252,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.334921737201512e-05,
      "MoBERT-F": 0.3604432458524088,
      "MoBERT-N": 0.5075269023222904,
      "MoBERT-min(F/N)": 0.3604432458524088,
      "MoBERT-max(F/N)": 0.5075269023222904,
      "MotionCritic": -12.729927062988281,
      "VeMo (human-opt view)": 0.9903115633930438,
      "VeMo (max entropy view)": 0.8871595330739299,
      "VeMo (min entropy view)": 0.9903115633930438,
      "VeMo (random view)": 0.9903115633930438,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks forward slowly, then turns around."
  },
  "010917": {
    "text": "standing on one leg and swinging it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6293309701359205,
      "Minus Multimodal Distance": -4.2519850730896,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.3915933676762506e-05,
      "MoBERT-F": 0.5253581443954809,
      "MoBERT-N": 0.5936400953956386,
      "MoBERT-min(F/N)": 0.5253581443954809,
      "MoBERT-max(F/N)": 0.5936400953956386,
      "MotionCritic": -9.050045013427734,
      "VeMo (human-opt view)": 0.9908416921064108,
      "VeMo (max entropy view)": 0.9896547969073287,
      "VeMo (min entropy view)": 0.9908416921064108,
      "VeMo (random view)": 0.9896547969073287,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing on one leg and swinging it."
  },
  "010039": {
    "text": "person walks forwards straight while stumbling",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4493046784008777,
      "Minus Multimodal Distance": -9.823214530944824,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00012111001706216484,
      "MoBERT-F": 0.5486597659039927,
      "MoBERT-N": 0.6661029636684128,
      "MoBERT-min(F/N)": 0.5486597659039927,
      "MoBERT-max(F/N)": 0.6661029636684128,
      "MotionCritic": -2.48276424407959,
      "VeMo (human-opt view)": 0.8674275680421423,
      "VeMo (max entropy view)": 0.8270944741532977,
      "VeMo (min entropy view)": 0.8674275680421423,
      "VeMo (random view)": 0.8674275680421423,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward straight while stumbling."
  },
  "006331": {
    "text": "a person is stumbling forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6310779917724793,
      "Minus Multimodal Distance": -6.585293292999268,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.510489684937056e-05,
      "MoBERT-F": 0.46829139943512804,
      "MoBERT-N": 0.4838887185145953,
      "MoBERT-min(F/N)": 0.46829139943512804,
      "MoBERT-max(F/N)": 0.4838887185145953,
      "MotionCritic": -4.592930793762207,
      "VeMo (human-opt view)": 0.935933147632312,
      "VeMo (max entropy view)": 0.8874061718098415,
      "VeMo (min entropy view)": 0.935933147632312,
      "VeMo (random view)": 0.8874061718098415,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is stumbling forward."
  },
  "004734": {
    "text": "a person walks forwards casually.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.32575047745065294,
      "Minus Multimodal Distance": -8.037454605102539,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.578772728156764e-05,
      "MoBERT-F": 0.4013521238425455,
      "MoBERT-N": 0.5346836086327809,
      "MoBERT-min(F/N)": 0.4013521238425455,
      "MoBERT-max(F/N)": 0.5346836086327809,
      "MotionCritic": -5.393486499786377,
      "VeMo (human-opt view)": 0.9819494584837545,
      "VeMo (max entropy view)": 0.9362928797924474,
      "VeMo (min entropy view)": 0.9819494584837545,
      "VeMo (random view)": 0.9362928797924474,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward casually."
  },
  "008862": {
    "text": "someone jumps up twice, bringing their hands together in front of them as they go upwards and releasing them as they go downwards.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8160097771158048,
      "Minus Multimodal Distance": -2.814985990524292,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.013679965399205685,
      "MoBERT-F": 0.7066017851889855,
      "MoBERT-N": 0.6727416322686778,
      "MoBERT-min(F/N)": 0.6727416322686778,
      "MoBERT-max(F/N)": 0.7066017851889855,
      "MotionCritic": -6.342186450958252,
      "VeMo (human-opt view)": 0.8874388254486134,
      "VeMo (max entropy view)": 0.59375,
      "VeMo (min entropy view)": 0.8874388254486134,
      "VeMo (random view)": 0.8874388254486134,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone jumps up twice, bringing their hands together in front of them as they go upward and releasing them as they go downward."
  },
  "006426": {
    "text": "person runs in a zigzag motion and ducks under an invisible object hlfway through then returns to full height.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7595693187054995,
      "Minus Multimodal Distance": -3.2279937267303467,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.979310929775238,
      "MoBERT-F": 0.772652328552065,
      "MoBERT-N": 0.6903909236168394,
      "MoBERT-min(F/N)": 0.6903909236168394,
      "MoBERT-max(F/N)": 0.772652328552065,
      "MotionCritic": -7.2108540534973145,
      "VeMo (human-opt view)": 0.7980535279805353,
      "VeMo (max entropy view)": 0.7777777777777778,
      "VeMo (min entropy view)": 0.7980535279805353,
      "VeMo (random view)": 0.7777777777777778,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs in a zigzag motion and ducks under an invisible object halfway through, then returns to full height."
  },
  "001005": {
    "text": "a person touches each elbow to the opposite knee then spreads his legs and starts to do squats.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7983193205746335,
      "Minus Multimodal Distance": -4.714250087738037,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.5154760553268716e-05,
      "MoBERT-F": 0.3563558378424061,
      "MoBERT-N": 0.4498960957254408,
      "MoBERT-min(F/N)": 0.3563558378424061,
      "MoBERT-max(F/N)": 0.4498960957254408,
      "MotionCritic": -6.007505416870117,
      "VeMo (human-opt view)": 0.7878211227402474,
      "VeMo (max entropy view)": 0.7302158273381295,
      "VeMo (min entropy view)": 0.7878211227402474,
      "VeMo (random view)": 0.7878211227402474,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person touches each elbow to the opposite knee, then spreads their legs and starts to do squats."
  },
  "012021": {
    "text": "a person leans forward and uses sweeping motion, sweeping from person's left to right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3759957885858872,
      "Minus Multimodal Distance": -8.80168628692627,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3254220650414936e-05,
      "MoBERT-F": 0.39003442409095207,
      "MoBERT-N": 0.4786589142529807,
      "MoBERT-min(F/N)": 0.39003442409095207,
      "MoBERT-max(F/N)": 0.4786589142529807,
      "MotionCritic": -4.007441997528076,
      "VeMo (human-opt view)": 0.8668866886688669,
      "VeMo (max entropy view)": 0.6215469613259669,
      "VeMo (min entropy view)": 0.8668866886688669,
      "VeMo (random view)": 0.8668866886688669,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person leans forward and uses a sweeping motion, sweeping from the person's left to right."
  },
  "007001": {
    "text": "i person side steps to his right, sliding his feet across the ground.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2948613853224563,
      "Minus Multimodal Distance": -10.951550483703613,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.007652457803487778,
      "MoBERT-F": 0.6012931100318851,
      "MoBERT-N": 0.6451523183220833,
      "MoBERT-min(F/N)": 0.6012931100318851,
      "MoBERT-max(F/N)": 0.6451523183220833,
      "MotionCritic": -4.241452693939209,
      "VeMo (human-opt view)": 0.9741496598639455,
      "VeMo (max entropy view)": 0.9496034791506779,
      "VeMo (min entropy view)": 0.9741496598639455,
      "VeMo (random view)": 0.9741496598639455,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person side - steps to his right, sliding his feet across the ground."
  },
  "008006": {
    "text": "a person does a threatening crouching walk and then raises their arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0598545986795305,
      "Minus Multimodal Distance": -8.111778259277344,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.10352959483861923,
      "MoBERT-F": 0.5919847698622509,
      "MoBERT-N": 0.5576551631094677,
      "MoBERT-min(F/N)": 0.5576551631094677,
      "MoBERT-max(F/N)": 0.5919847698622509,
      "MotionCritic": -5.652400970458984,
      "VeMo (human-opt view)": 0.9755855744206221,
      "VeMo (max entropy view)": 0.9755549311604383,
      "VeMo (min entropy view)": 0.9755855744206221,
      "VeMo (random view)": 0.9755549311604383,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does a threatening crouching walk and then raises their arms."
  },
  "006186": {
    "text": "person makes washing motions with both hands on opposite upper arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6716453359146364,
      "Minus Multimodal Distance": -7.7148542404174805,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.5235243558418006e-05,
      "MoBERT-F": 0.34702068526292507,
      "MoBERT-N": 0.4232879069884786,
      "MoBERT-min(F/N)": 0.34702068526292507,
      "MoBERT-max(F/N)": 0.4232879069884786,
      "MotionCritic": -9.677311897277832,
      "VeMo (human-opt view)": 0.7982456140350878,
      "VeMo (max entropy view)": 0.6501128668171557,
      "VeMo (min entropy view)": 0.7982456140350878,
      "VeMo (random view)": 0.7982456140350878,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person makes washing motions with both hands on the opposite upper arms."
  },
  "011809": {
    "text": "a person is moving around the room while moving his hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7516258033903936,
      "Minus Multimodal Distance": -4.36995267868042,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0021162310149520636,
      "MoBERT-F": 0.5466524738625069,
      "MoBERT-N": 0.3809053742908391,
      "MoBERT-min(F/N)": 0.3809053742908391,
      "MoBERT-max(F/N)": 0.5466524738625069,
      "MotionCritic": -14.276678085327148,
      "VeMo (human-opt view)": 0.9604802401200601,
      "VeMo (max entropy view)": 0.9604802401200601,
      "VeMo (min entropy view)": 0.964964964964965,
      "VeMo (random view)": 0.964964964964965,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is moving around the room while moving his hands."
  },
  "005031": {
    "text": "a person does a drinking motion with his right arm",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4440403980260608,
      "Minus Multimodal Distance": -6.4426493644714355,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.41632434760686e-05,
      "MoBERT-F": 0.31956530134882266,
      "MoBERT-N": 0.38868152843638837,
      "MoBERT-min(F/N)": 0.31956530134882266,
      "MoBERT-max(F/N)": 0.38868152843638837,
      "MotionCritic": -3.969461441040039,
      "VeMo (human-opt view)": 0.43896103896103894,
      "VeMo (max entropy view)": 0.43896103896103894,
      "VeMo (min entropy view)": 0.10049191848208011,
      "VeMo (random view)": 0.10049191848208011,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person makes a drinking motion with his right arm."
  },
  "009171": {
    "text": "a person sits down to avoid pressure on their knees.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7806289924607642,
      "Minus Multimodal Distance": -9.827864646911621,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5115290554822423e-05,
      "MoBERT-F": 0.3280951349807889,
      "MoBERT-N": 0.4319239040766663,
      "MoBERT-min(F/N)": 0.3280951349807889,
      "MoBERT-max(F/N)": 0.4319239040766663,
      "MotionCritic": -6.608241081237793,
      "VeMo (human-opt view)": 0.7060998151571165,
      "VeMo (max entropy view)": 0.6224256292906178,
      "VeMo (min entropy view)": 0.7060998151571165,
      "VeMo (random view)": 0.6224256292906178,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits down to avoid pressure on their knees."
  },
  "011630": {
    "text": "a person who seems to throw or hit something",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6302284265087853,
      "Minus Multimodal Distance": -6.325830936431885,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00797821395099163,
      "MoBERT-F": 0.5527954453038654,
      "MoBERT-N": 0.6158257505138826,
      "MoBERT-min(F/N)": 0.5527954453038654,
      "MoBERT-max(F/N)": 0.6158257505138826,
      "MotionCritic": -5.719411373138428,
      "VeMo (human-opt view)": 0.9819853397937632,
      "VeMo (max entropy view)": 0.975609756097561,
      "VeMo (min entropy view)": 0.9819853397937632,
      "VeMo (random view)": 0.9819853397937632,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who seems to throw or hit something."
  },
  "005729": {
    "text": "a person sways to the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.42425639745433996,
      "Minus Multimodal Distance": -6.5834455490112305,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.1400057196151465e-05,
      "MoBERT-F": 0.34179914814066686,
      "MoBERT-N": 0.4384077377848278,
      "MoBERT-min(F/N)": 0.34179914814066686,
      "MoBERT-max(F/N)": 0.4384077377848278,
      "MotionCritic": -4.458872318267822,
      "VeMo (human-opt view)": 0.8076923076923077,
      "VeMo (max entropy view)": 0.7979139504563233,
      "VeMo (min entropy view)": 0.8076923076923077,
      "VeMo (random view)": 0.8076923076923077,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sways to the right."
  },
  "009373": {
    "text": "a figure appears to climb stairs",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2709645768871374,
      "Minus Multimodal Distance": -7.628599643707275,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.301203469163738e-05,
      "MoBERT-F": 0.5148680651097772,
      "MoBERT-N": 0.5540100468545316,
      "MoBERT-min(F/N)": 0.5148680651097772,
      "MoBERT-max(F/N)": 0.5540100468545316,
      "MotionCritic": -6.827981472015381,
      "VeMo (human-opt view)": 0.0035837189984573394,
      "VeMo (max entropy view)": 0.0035837189984573394,
      "VeMo (min entropy view)": 0.0031815375848805736,
      "VeMo (random view)": 0.0031815375848805736,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure appears to be climbing stairs."
  },
  "005790": {
    "text": "a person dances with someone.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7044432067727249,
      "Minus Multimodal Distance": -10.13183879852295,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.30539482831954956,
      "MoBERT-F": 0.7257152303322554,
      "MoBERT-N": 0.6600532091717821,
      "MoBERT-min(F/N)": 0.6600532091717821,
      "MoBERT-max(F/N)": 0.7257152303322554,
      "MotionCritic": -7.324164867401123,
      "VeMo (human-opt view)": 0.14072059823249491,
      "VeMo (max entropy view)": 0.18237454100367198,
      "VeMo (min entropy view)": 0.14072059823249491,
      "VeMo (random view)": 0.14072059823249491,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person dances with someone."
  },
  "004424": {
    "text": "someone steps back with their right foot and then sits down while placing his hands on his knees with elbows out.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.637609185694391,
      "Minus Multimodal Distance": -10.058680534362793,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9934409260749817,
      "MoBERT-F": 0.5544361570519574,
      "MoBERT-N": 0.49994474906633996,
      "MoBERT-min(F/N)": 0.49994474906633996,
      "MoBERT-max(F/N)": 0.5544361570519574,
      "MotionCritic": -13.36335277557373,
      "VeMo (human-opt view)": 0.8514412416851441,
      "VeMo (max entropy view)": 0.7878172588832487,
      "VeMo (min entropy view)": 0.8514412416851441,
      "VeMo (random view)": 0.7878172588832487,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone steps back with their right foot, then sits down while placing their hands on their knees, with elbows out."
  },
  "007585": {
    "text": "a person who is standing with his hands by his sides takes two steps forward, angles to his left as he continues walking and then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.31088743699346166,
      "Minus Multimodal Distance": -4.188388347625732,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2592061213799752e-05,
      "MoBERT-F": 0.3427703647090823,
      "MoBERT-N": 0.5181681458193463,
      "MoBERT-min(F/N)": 0.3427703647090823,
      "MoBERT-max(F/N)": 0.5181681458193463,
      "MotionCritic": -10.95527458190918,
      "VeMo (human-opt view)": 0.984009840098401,
      "VeMo (max entropy view)": 0.9755584756898817,
      "VeMo (min entropy view)": 0.984009840098401,
      "VeMo (random view)": 0.984009840098401,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who is standing with their hands by their sides takes two steps forward, angles to their left as they continue walking, and then stops."
  },
  "002656": {
    "text": "a person pers someing with right hand then scratches something with left have and then steps backwards",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5522971229503371,
      "Minus Multimodal Distance": -4.839555263519287,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.07934501767158508,
      "MoBERT-F": 0.5016059816852136,
      "MoBERT-N": 0.4659636397666871,
      "MoBERT-min(F/N)": 0.4659636397666871,
      "MoBERT-max(F/N)": 0.5016059816852136,
      "MotionCritic": -7.519694805145264,
      "VeMo (human-opt view)": 0.6788990825688074,
      "VeMo (max entropy view)": 0.6788990825688074,
      "VeMo (min entropy view)": 0.7306122448979592,
      "VeMo (random view)": 0.6788990825688074,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does something with the right hand, then scratches something with the left hand, and then steps backwards."
  },
  "000565": {
    "text": "an off balance intoxicated man gestures at another person to the left. seemingly in an argument.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7620397388618504,
      "Minus Multimodal Distance": -9.420817375183105,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.445819518470671e-05,
      "MoBERT-F": 0.4128710053784198,
      "MoBERT-N": 0.44616580017550267,
      "MoBERT-min(F/N)": 0.4128710053784198,
      "MoBERT-max(F/N)": 0.44616580017550267,
      "MotionCritic": -7.6538310050964355,
      "VeMo (human-opt view)": 8.77972525884129e-05,
      "VeMo (max entropy view)": 0.0013232310361389098,
      "VeMo (min entropy view)": 8.77972525884129e-05,
      "VeMo (random view)": 8.77972525884129e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "An off - balance, intoxicated man gestures at another person to his left, seemingly in an argument."
  },
  "008843": {
    "text": "a person lifts and spins around their right leg then lifts and spins around their left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9389026124612418,
      "Minus Multimodal Distance": -9.867859840393066,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0029021515510976315,
      "MoBERT-F": 0.551024529148808,
      "MoBERT-N": 0.5240965863346819,
      "MoBERT-min(F/N)": 0.5240965863346819,
      "MoBERT-max(F/N)": 0.551024529148808,
      "MotionCritic": -9.188182830810547,
      "VeMo (human-opt view)": 0.7770582793709528,
      "VeMo (max entropy view)": 0.7770582793709528,
      "VeMo (min entropy view)": 0.7773722627737226,
      "VeMo (random view)": 0.7770582793709528,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lifts and spins around their right leg, then lifts and spins around their left leg."
  },
  "000274": {
    "text": "a man walks forward and raises both his arms and then drop his arms .",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.530037295180903,
      "Minus Multimodal Distance": -6.11810827255249,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.948340654373169,
      "MoBERT-F": 0.7486155342365858,
      "MoBERT-N": 0.7838669254573641,
      "MoBERT-min(F/N)": 0.7486155342365858,
      "MoBERT-max(F/N)": 0.7838669254573641,
      "MotionCritic": -7.258154392242432,
      "VeMo (human-opt view)": 0.9951097178683386,
      "VeMo (max entropy view)": 0.9951097178683386,
      "VeMo (min entropy view)": 0.9968184624151194,
      "VeMo (random view)": 0.9968184624151194,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks forward, raises both his arms, and then drops his arms."
  },
  "014535": {
    "text": "a person appears to have severe arm pain holding and slouching their right shoulder.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.42700015995717017,
      "Minus Multimodal Distance": -9.727538108825684,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.275451308581978e-05,
      "MoBERT-F": 0.30979327226754705,
      "MoBERT-N": 0.417969897332496,
      "MoBERT-min(F/N)": 0.30979327226754705,
      "MoBERT-max(F/N)": 0.417969897332496,
      "MotionCritic": -1.2779291868209839,
      "VeMo (human-opt view)": 0.5928753180661578,
      "VeMo (max entropy view)": 0.5928753180661578,
      "VeMo (min entropy view)": 0.3073394495412844,
      "VeMo (random view)": 0.3073394495412844,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person appears to have severe arm pain, holding and slouching their right shoulder."
  },
  "002357": {
    "text": "a headless line figure takes four steps forward, down a ramp, toward the viewer.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.33711727166080935,
      "Minus Multimodal Distance": -5.973976135253906,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.1503321224590763e-05,
      "MoBERT-F": 0.3106619840550229,
      "MoBERT-N": 0.45190631009559035,
      "MoBERT-min(F/N)": 0.3106619840550229,
      "MoBERT-max(F/N)": 0.45190631009559035,
      "MotionCritic": -2.700190782546997,
      "VeMo (human-opt view)": 0.3347457627118644,
      "VeMo (max entropy view)": 0.5615942028985508,
      "VeMo (min entropy view)": 0.3347457627118644,
      "VeMo (random view)": 0.3347457627118644,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure takes four steps forward, down a ramp, toward the viewer."
  },
  "006095": {
    "text": "person seems to be exercising by bending at the knees and walking forward",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.910417091132284,
      "Minus Multimodal Distance": -7.001521587371826,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9894232153892517,
      "MoBERT-F": 0.6519354657213942,
      "MoBERT-N": 0.554865516405983,
      "MoBERT-min(F/N)": 0.554865516405983,
      "MoBERT-max(F/N)": 0.6519354657213942,
      "MotionCritic": -5.297787189483643,
      "VeMo (human-opt view)": 0.9850317124735729,
      "VeMo (max entropy view)": 0.9770385999751768,
      "VeMo (min entropy view)": 0.9850317124735729,
      "VeMo (random view)": 0.9850317124735729,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person seems to be exercising by bending at the knees and walking forward."
  },
  "008170": {
    "text": "the person is standing on his left foot bending his right knee.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3580655505871148,
      "Minus Multimodal Distance": -7.405013084411621,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.18973179673776e-05,
      "MoBERT-F": 0.3368001990803568,
      "MoBERT-N": 0.37958562277307073,
      "MoBERT-min(F/N)": 0.3368001990803568,
      "MoBERT-max(F/N)": 0.37958562277307073,
      "MotionCritic": -15.027549743652344,
      "VeMo (human-opt view)": 0.6519337016574586,
      "VeMo (max entropy view)": 0.6519337016574586,
      "VeMo (min entropy view)": 0.9149828440716736,
      "VeMo (random view)": 0.9149828440716736,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is standing on his left foot, bending his right knee."
  },
  "013023": {
    "text": "person moves i a anticlockwise direction in a circle by sprinting",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4369815499810978,
      "Minus Multimodal Distance": -5.545800685882568,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0013980703661218286,
      "MoBERT-F": 0.605364409939112,
      "MoBERT-N": 0.6841791187206812,
      "MoBERT-min(F/N)": 0.605364409939112,
      "MoBERT-max(F/N)": 0.6841791187206812,
      "MotionCritic": -12.811697959899902,
      "VeMo (human-opt view)": 0.13307984790874525,
      "VeMo (max entropy view)": 0.13307984790874525,
      "VeMo (min entropy view)": 0.03504993146661445,
      "VeMo (random view)": 0.13307984790874525,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sprints in a circle in an anticlockwise direction."
  },
  "007862": {
    "text": "a person crosses their arms, then brings their arms back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.243970587075479,
      "Minus Multimodal Distance": -4.886783599853516,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.829191311728209e-05,
      "MoBERT-F": 0.29885519933360305,
      "MoBERT-N": 0.44531460673444145,
      "MoBERT-min(F/N)": 0.29885519933360305,
      "MoBERT-max(F/N)": 0.44531460673444145,
      "MotionCritic": -2.8223750591278076,
      "VeMo (human-opt view)": 0.9994474581056283,
      "VeMo (max entropy view)": 0.9284253578732107,
      "VeMo (min entropy view)": 0.9994474581056283,
      "VeMo (random view)": 0.9284253578732107,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person crosses their arms, then brings their arms back down."
  },
  "012877": {
    "text": "a person start to dance with legs",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8503917779021706,
      "Minus Multimodal Distance": -6.0766496658325195,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5105629563331604,
      "MoBERT-F": 0.7058098204790859,
      "MoBERT-N": 0.7803745565134176,
      "MoBERT-min(F/N)": 0.7058098204790859,
      "MoBERT-max(F/N)": 0.7803745565134176,
      "MotionCritic": -4.057027339935303,
      "VeMo (human-opt view)": 0.8269525267993875,
      "VeMo (max entropy view)": 0.8269525267993875,
      "VeMo (min entropy view)": 0.8440090429540317,
      "VeMo (random view)": 0.8440090429540317,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person starts to dance with their legs."
  },
  "011162": {
    "text": "the person is painting a wall.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3257960549248191,
      "Minus Multimodal Distance": -8.204632759094238,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.326085324166343e-05,
      "MoBERT-F": 0.4611470656670899,
      "MoBERT-N": 0.5539245793828863,
      "MoBERT-min(F/N)": 0.4611470656670899,
      "MoBERT-max(F/N)": 0.5539245793828863,
      "MotionCritic": -7.568181991577148,
      "VeMo (human-opt view)": 5.078138088278304e-06,
      "VeMo (max entropy view)": 1.6607929954394624e-05,
      "VeMo (min entropy view)": 5.078138088278304e-06,
      "VeMo (random view)": 5.078138088278304e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is painting a wall."
  },
  "004110": {
    "text": "a person jumping over a puddle",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3695069719086315,
      "Minus Multimodal Distance": -4.923709392547607,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.41646063327789307,
      "MoBERT-F": 0.7046223917594214,
      "MoBERT-N": 0.6331568549040187,
      "MoBERT-min(F/N)": 0.6331568549040187,
      "MoBERT-max(F/N)": 0.7046223917594214,
      "MotionCritic": -10.525193214416504,
      "VeMo (human-opt view)": 0.00043136532187346214,
      "VeMo (max entropy view)": 0.00043136532187346214,
      "VeMo (min entropy view)": 1.184549855996884e-05,
      "VeMo (random view)": 1.184549855996884e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is jumping over a puddle."
  },
  "006306": {
    "text": "a person pushes their right arm forward and then uses both hands together while bending over as if catching something.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6174163370834226,
      "Minus Multimodal Distance": -7.240248203277588,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4037120965658687e-05,
      "MoBERT-F": 0.38576401207365874,
      "MoBERT-N": 0.404006198050909,
      "MoBERT-min(F/N)": 0.38576401207365874,
      "MoBERT-max(F/N)": 0.404006198050909,
      "MotionCritic": -7.629611492156982,
      "VeMo (human-opt view)": 0.693069306930693,
      "VeMo (max entropy view)": 0.693069306930693,
      "VeMo (min entropy view)": 0.8809849521203831,
      "VeMo (random view)": 0.693069306930693,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person extends their right arm forward, then, while bending over, uses both hands simultaneously as if catching something."
  },
  "012735": {
    "text": "standing person raises right leg and both arms, turns slightly to the left, then returns to original standing position.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4912348496238516,
      "Minus Multimodal Distance": -4.147408485412598,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2218155208975077e-05,
      "MoBERT-F": 0.30479208660730617,
      "MoBERT-N": 0.47707315446254245,
      "MoBERT-min(F/N)": 0.30479208660730617,
      "MoBERT-max(F/N)": 0.47707315446254245,
      "MotionCritic": -7.811404705047607,
      "VeMo (human-opt view)": 0.008590339388818476,
      "VeMo (max entropy view)": 0.02284932083815942,
      "VeMo (min entropy view)": 0.008590339388818476,
      "VeMo (random view)": 0.02284932083815942,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A standing person raises their right leg and both arms, turns slightly to the left, then returns to the original standing position."
  },
  "013855": {
    "text": "the toon has their both arms up & extending in a \"dancing\" position, while moving forwards then backwards.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.612144144511052,
      "Minus Multimodal Distance": -7.239539623260498,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.2180698961019516,
      "MoBERT-F": 0.6580860696565893,
      "MoBERT-N": 0.5731540883259273,
      "MoBERT-min(F/N)": 0.5731540883259273,
      "MoBERT-max(F/N)": 0.6580860696565893,
      "MotionCritic": -3.088995933532715,
      "VeMo (human-opt view)": 0.9324618736383442,
      "VeMo (max entropy view)": 0.928416485900217,
      "VeMo (min entropy view)": 0.9324618736383442,
      "VeMo (random view)": 0.9324618736383442,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The toon has both their arms held up and extended in a \"dancing\" position, while moving forward then backward."
  },
  "000189": {
    "text": "a figure walking in a straight line, swinging their left arm far greater than the right arm.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5121884128743054,
      "Minus Multimodal Distance": -4.228654384613037,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8811084121116437e-05,
      "MoBERT-F": 0.33770561260173854,
      "MoBERT-N": 0.3892962760892672,
      "MoBERT-min(F/N)": 0.33770561260173854,
      "MoBERT-max(F/N)": 0.3892962760892672,
      "MotionCritic": -6.9269561767578125,
      "VeMo (human-opt view)": 0.6073170731707317,
      "VeMo (max entropy view)": 0.6073170731707317,
      "VeMo (min entropy view)": 0.6651376146788991,
      "VeMo (random view)": 0.6651376146788991,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure is walking in a straight line, swinging their left arm far more than their right arm."
  },
  "005955": {
    "text": "a person puts their hands together, leans forwards slightly then swings the arms from right to left",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6497316070217772,
      "Minus Multimodal Distance": -5.544854640960693,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.6360019041458145e-05,
      "MoBERT-F": 0.33998822063507306,
      "MoBERT-N": 0.4340565397070026,
      "MoBERT-min(F/N)": 0.33998822063507306,
      "MoBERT-max(F/N)": 0.4340565397070026,
      "MotionCritic": -7.343395233154297,
      "VeMo (human-opt view)": 0.9196729196729196,
      "VeMo (max entropy view)": 0.8265835929387332,
      "VeMo (min entropy view)": 0.9196729196729196,
      "VeMo (random view)": 0.8265835929387332,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person puts their hands together, leans forward slightly, then swings the arms from right to left."
  },
  "003613": {
    "text": "a man picks something up with his left hand and mixes it and then steps back.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5529888909228845,
      "Minus Multimodal Distance": -11.722952842712402,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.183801451697946e-05,
      "MoBERT-F": 0.34263644141224325,
      "MoBERT-N": 0.442939977767817,
      "MoBERT-min(F/N)": 0.34263644141224325,
      "MoBERT-max(F/N)": 0.442939977767817,
      "MotionCritic": 2.421691656112671,
      "VeMo (human-opt view)": 0.6076388888888888,
      "VeMo (max entropy view)": 0.6076388888888888,
      "VeMo (min entropy view)": 0.08996897621509824,
      "VeMo (random view)": 0.08996897621509824,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man picks something up with his left hand, mixes it, and then steps back."
  },
  "008002": {
    "text": "a man is standing still and then starts walking forward before turning around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6329610224140968,
      "Minus Multimodal Distance": -9.868782997131348,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5638782972237095e-05,
      "MoBERT-F": 0.386458026699082,
      "MoBERT-N": 0.4459171046959277,
      "MoBERT-min(F/N)": 0.386458026699082,
      "MoBERT-max(F/N)": 0.4459171046959277,
      "MotionCritic": -12.881437301635742,
      "VeMo (human-opt view)": 0.9919465126880413,
      "VeMo (max entropy view)": 0.9627740948495666,
      "VeMo (min entropy view)": 0.9919465126880413,
      "VeMo (random view)": 0.9919465126880413,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is standing still, and then he starts walking forward before turning around."
  },
  "001429": {
    "text": "a man bends his right arm at the elbow in a struggling motion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.485366959677748,
      "Minus Multimodal Distance": -8.27163028717041,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9441107471939176e-05,
      "MoBERT-F": 0.31734142307621227,
      "MoBERT-N": 0.37013383418441914,
      "MoBERT-min(F/N)": 0.31734142307621227,
      "MoBERT-max(F/N)": 0.37013383418441914,
      "MotionCritic": -5.214310169219971,
      "VeMo (human-opt view)": 0.9242113887750921,
      "VeMo (max entropy view)": 0.7660910518053375,
      "VeMo (min entropy view)": 0.9242113887750921,
      "VeMo (random view)": 0.7660910518053375,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man bends his right arm at the elbow in a struggling motion."
  },
  "013003": {
    "text": "a person, while running quickly, bends down and picks something up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5095445318043683,
      "Minus Multimodal Distance": -10.658273696899414,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9919730424880981,
      "MoBERT-F": 0.7240440342984372,
      "MoBERT-N": 0.7136257452067284,
      "MoBERT-min(F/N)": 0.7136257452067284,
      "MoBERT-max(F/N)": 0.7240440342984372,
      "MotionCritic": -6.888584136962891,
      "VeMo (human-opt view)": 0.8181818181818182,
      "VeMo (max entropy view)": 0.7775768535262206,
      "VeMo (min entropy view)": 0.8181818181818182,
      "VeMo (random view)": 0.7775768535262206,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person, while running quickly, bends down and picks something up."
  },
  "014285": {
    "text": "the person was walking so he could sneak up on someone.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7132444228941264,
      "Minus Multimodal Distance": -6.776355743408203,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4028824554989114e-05,
      "MoBERT-F": 0.3761953676555916,
      "MoBERT-N": 0.5601748168756484,
      "MoBERT-min(F/N)": 0.3761953676555916,
      "MoBERT-max(F/N)": 0.5601748168756484,
      "MotionCritic": -4.759599685668945,
      "VeMo (human-opt view)": 0.6073619631901841,
      "VeMo (max entropy view)": 0.6073619631901841,
      "VeMo (min entropy view)": 0.651872399445215,
      "VeMo (random view)": 0.6073619631901841,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was walking quietly so that he could sneak up on someone."
  },
  "002242": {
    "text": "a person crawling from right to left and vice versa",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1917847087213163,
      "Minus Multimodal Distance": -3.2194020748138428,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9768859148025513,
      "MoBERT-F": 0.7546939575751856,
      "MoBERT-N": 0.5579973735344982,
      "MoBERT-min(F/N)": 0.5579973735344982,
      "MoBERT-max(F/N)": 0.7546939575751856,
      "MotionCritic": -7.490814208984375,
      "VeMo (human-opt view)": 0.7661290322580645,
      "VeMo (max entropy view)": 0.7549407114624506,
      "VeMo (min entropy view)": 0.7661290322580645,
      "VeMo (random view)": 0.7549407114624506,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person crawling from right to left and vice versa."
  },
  "009488": {
    "text": "a person walks towards the left making a wide 's' shape.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9339829647195238,
      "Minus Multimodal Distance": -9.841514587402344,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.563702582847327e-05,
      "MoBERT-F": 0.3999064063245986,
      "MoBERT-N": 0.4638522542463778,
      "MoBERT-min(F/N)": 0.3999064063245986,
      "MoBERT-max(F/N)": 0.4638522542463778,
      "MotionCritic": -12.755931854248047,
      "VeMo (human-opt view)": 0.7881481481481482,
      "VeMo (max entropy view)": 0.6222222222222222,
      "VeMo (min entropy view)": 0.7881481481481482,
      "VeMo (random view)": 0.6222222222222222,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks towards the left, making a wide 'S' shape."
  },
  "004996": {
    "text": "the person is sat down and their arms are shaking",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.49967110820122496,
      "Minus Multimodal Distance": -10.811132431030273,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7470487111713737e-05,
      "MoBERT-F": 0.29190694383624716,
      "MoBERT-N": 0.40255394177981585,
      "MoBERT-min(F/N)": 0.29190694383624716,
      "MoBERT-max(F/N)": 0.40255394177981585,
      "MotionCritic": -8.39529037475586,
      "VeMo (human-opt view)": 0.23421588594704684,
      "VeMo (max entropy view)": 0.6666666666666666,
      "VeMo (min entropy view)": 0.23421588594704684,
      "VeMo (random view)": 0.23421588594704684,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is sitting down and their arms are shaking."
  },
  "012981": {
    "text": "a person waves their arms over their heads.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.32791406393424,
      "Minus Multimodal Distance": -13.455573081970215,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00015947582141961902,
      "MoBERT-F": 0.5026130348288033,
      "MoBERT-N": 0.565144940038431,
      "MoBERT-min(F/N)": 0.5026130348288033,
      "MoBERT-max(F/N)": 0.565144940038431,
      "MotionCritic": -8.184040069580078,
      "VeMo (human-opt view)": 0.9966235539401096,
      "VeMo (max entropy view)": 0.9959298429148209,
      "VeMo (min entropy view)": 0.9966235539401096,
      "VeMo (random view)": 0.9959298429148209,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person waves their arms over their head."
  },
  "003964": {
    "text": "a person opens and drinks from a container.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.648928849398144,
      "Minus Multimodal Distance": -4.6410746574401855,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8187558200443164e-05,
      "MoBERT-F": 0.3476764672786696,
      "MoBERT-N": 0.40805132145824036,
      "MoBERT-min(F/N)": 0.3476764672786696,
      "MoBERT-max(F/N)": 0.40805132145824036,
      "MotionCritic": -4.319687366485596,
      "VeMo (human-opt view)": 1.5258556235409006e-05,
      "VeMo (max entropy view)": 2.013619320639861e-05,
      "VeMo (min entropy view)": 1.5258556235409006e-05,
      "VeMo (random view)": 2.013619320639861e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person opens a container and drinks from it."
  },
  "002928": {
    "text": "a person stumbling to their right side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2582083385432792,
      "Minus Multimodal Distance": -5.260801792144775,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5586976335034706e-05,
      "MoBERT-F": 0.3552933339819755,
      "MoBERT-N": 0.45418707078433973,
      "MoBERT-min(F/N)": 0.3552933339819755,
      "MoBERT-max(F/N)": 0.45418707078433973,
      "MotionCritic": -4.721866607666016,
      "VeMo (human-opt view)": 0.9578917215697508,
      "VeMo (max entropy view)": 0.9497020262216925,
      "VeMo (min entropy view)": 0.9578917215697508,
      "VeMo (random view)": 0.9497020262216925,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is stumbling to their right side."
  },
  "007451": {
    "text": "raising arm towards chest while standing.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.26283053846696075,
      "Minus Multimodal Distance": -2.4741079807281494,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.35307262907736e-05,
      "MoBERT-F": 0.3135489833029322,
      "MoBERT-N": 0.3926366925659301,
      "MoBERT-min(F/N)": 0.3135489833029322,
      "MoBERT-max(F/N)": 0.3926366925659301,
      "MotionCritic": -4.171450138092041,
      "VeMo (human-opt view)": 0.9959298429148209,
      "VeMo (max entropy view)": 0.8267148014440433,
      "VeMo (min entropy view)": 0.9959298429148209,
      "VeMo (random view)": 0.9959298429148209,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises an arm towards the chest while standing."
  },
  "002795": {
    "text": "a person jogging in place.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.39535319307076167,
      "Minus Multimodal Distance": -7.961055755615234,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0023098124656826258,
      "MoBERT-F": 0.675021369437651,
      "MoBERT-N": 0.6368335645698339,
      "MoBERT-min(F/N)": 0.6368335645698339,
      "MoBERT-max(F/N)": 0.675021369437651,
      "MotionCritic": -7.7189860343933105,
      "VeMo (human-opt view)": 0.9434467971053337,
      "VeMo (max entropy view)": 0.4222222222222222,
      "VeMo (min entropy view)": 0.9434467971053337,
      "VeMo (random view)": 0.9434467971053337,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is jogging in place."
  },
  "001380": {
    "text": "a person bent slightly over and picked something up with right hand and turned to right and  shaking item, looks as if is cooking and adding items to a pot of water.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8022691898583099,
      "Minus Multimodal Distance": -5.2527923583984375,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.427862818876747e-05,
      "MoBERT-F": 0.3526002645392192,
      "MoBERT-N": 0.42504844518200546,
      "MoBERT-min(F/N)": 0.3526002645392192,
      "MoBERT-max(F/N)": 0.42504844518200546,
      "MotionCritic": -7.562880992889404,
      "VeMo (human-opt view)": 0.02594578924481809,
      "VeMo (max entropy view)": 0.027624309392265192,
      "VeMo (min entropy view)": 0.02594578924481809,
      "VeMo (random view)": 0.027624309392265192,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person bent slightly over, picked something up with their right hand, turned to the right, and shook the item. It looks as if they are cooking and adding items to a pot of water."
  },
  "003703": {
    "text": "a person runs forward and jumps over something, then turns around and jumps back over it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3298008788349345,
      "Minus Multimodal Distance": -5.814730167388916,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9959911704063416,
      "MoBERT-F": 0.7598628420293916,
      "MoBERT-N": 0.8510625449391827,
      "MoBERT-min(F/N)": 0.7598628420293916,
      "MoBERT-max(F/N)": 0.8510625449391827,
      "MotionCritic": -7.369813442230225,
      "VeMo (human-opt view)": 0.1330909090909091,
      "VeMo (max entropy view)": 0.2808988764044944,
      "VeMo (min entropy view)": 0.1330909090909091,
      "VeMo (random view)": 0.1330909090909091,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs forward and jumps over something, then turns around and jumps back over it."
  },
  "009749": {
    "text": "the person is doing something at the counter and moving it to the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3392105963552714,
      "Minus Multimodal Distance": -5.825275897979736,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.733930836664513e-05,
      "MoBERT-F": 0.45914098453850677,
      "MoBERT-N": 0.400477132500934,
      "MoBERT-min(F/N)": 0.400477132500934,
      "MoBERT-max(F/N)": 0.45914098453850677,
      "MotionCritic": -4.957935333251953,
      "VeMo (human-opt view)": 8.515808178582174e-05,
      "VeMo (max entropy view)": 0.0004880429477794046,
      "VeMo (min entropy view)": 8.515808178582174e-05,
      "VeMo (random view)": 8.515808178582174e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is doing something at the counter and moving it to the right."
  },
  "013289": {
    "text": "a person walks counterclockwise in a large curve while swinging their arms.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7884252919304986,
      "Minus Multimodal Distance": -11.051486015319824,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9290633797645569,
      "MoBERT-F": 0.7001109394164173,
      "MoBERT-N": 0.7443129987826946,
      "MoBERT-min(F/N)": 0.7001109394164173,
      "MoBERT-max(F/N)": 0.7443129987826946,
      "MotionCritic": -13.329718589782715,
      "VeMo (human-opt view)": 0.7184466019417476,
      "VeMo (max entropy view)": 0.7053140096618358,
      "VeMo (min entropy view)": 0.7184466019417476,
      "VeMo (random view)": 0.7184466019417476,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks counterclockwise in a large curve while swinging their arms."
  },
  "005050": {
    "text": "a person climbs up something, turns around and climbs back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6791539321084267,
      "Minus Multimodal Distance": -4.656869411468506,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9510331749916077,
      "MoBERT-F": 0.798218480747616,
      "MoBERT-N": 0.7591532638597036,
      "MoBERT-min(F/N)": 0.7591532638597036,
      "MoBERT-max(F/N)": 0.798218480747616,
      "MotionCritic": -11.839614868164062,
      "VeMo (human-opt view)": 0.004319617257817474,
      "VeMo (max entropy view)": 0.004319617257817474,
      "VeMo (min entropy view)": 0.0003574865942527155,
      "VeMo (random view)": 0.004319617257817474,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person climbs up something, turns around, and climbs back down."
  },
  "005981": {
    "text": "the person is doing arm exercises like a windmill.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6468259888313092,
      "Minus Multimodal Distance": -10.575932502746582,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.6046457886695862,
      "MoBERT-F": 0.7395070483071577,
      "MoBERT-N": 0.558283884641609,
      "MoBERT-min(F/N)": 0.558283884641609,
      "MoBERT-max(F/N)": 0.7395070483071577,
      "MotionCritic": -3.8640122413635254,
      "VeMo (human-opt view)": 0.9495760821062026,
      "VeMo (max entropy view)": 0.9495760821062026,
      "VeMo (min entropy view)": 0.9554285714285714,
      "VeMo (random view)": 0.9554285714285714,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is doing arm exercises like a windmill."
  },
  "009363": {
    "text": "someone is pulling back, while standing on one foot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8695668526854539,
      "Minus Multimodal Distance": -6.571593284606934,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00532224727794528,
      "MoBERT-F": 0.6487954386217827,
      "MoBERT-N": 0.6615336139380592,
      "MoBERT-min(F/N)": 0.6487954386217827,
      "MoBERT-max(F/N)": 0.6615336139380592,
      "MotionCritic": -5.213561058044434,
      "VeMo (human-opt view)": 0.562111801242236,
      "VeMo (max entropy view)": 0.562111801242236,
      "VeMo (min entropy view)": 0.6238095238095238,
      "VeMo (random view)": 0.6238095238095238,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone is pulling back while standing on one foot."
  },
  "006185": {
    "text": "the person is running around in a circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9451639025391365,
      "Minus Multimodal Distance": -6.387064456939697,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.08621836453676224,
      "MoBERT-F": 0.7338973433236837,
      "MoBERT-N": 0.7505059660467358,
      "MoBERT-min(F/N)": 0.7338973433236837,
      "MoBERT-max(F/N)": 0.7505059660467358,
      "MotionCritic": -12.483928680419922,
      "VeMo (human-opt view)": 0.7310126582278481,
      "VeMo (max entropy view)": 0.7310126582278481,
      "VeMo (min entropy view)": 0.7310549777117384,
      "VeMo (random view)": 0.7310126582278481,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is running around in a circle."
  },
  "004977": {
    "text": "a person is walking like a mummy.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6702364656357234,
      "Minus Multimodal Distance": -9.29016399383545,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.7209455966949463,
      "MoBERT-F": 0.7557545742480276,
      "MoBERT-N": 0.6188139401151176,
      "MoBERT-min(F/N)": 0.6188139401151176,
      "MoBERT-max(F/N)": 0.7557545742480276,
      "MotionCritic": -3.7535014152526855,
      "VeMo (human-opt view)": 0.6214953271028038,
      "VeMo (max entropy view)": 0.6214953271028038,
      "VeMo (min entropy view)": 0.6932849364791288,
      "VeMo (random view)": 0.6932849364791288,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking like a mummy."
  },
  "008205": {
    "text": "both the hand holding the right leg.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5084835790914279,
      "Minus Multimodal Distance": -6.449541091918945,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0001799344754545018,
      "MoBERT-F": 0.2677316706671016,
      "MoBERT-N": 0.26616231607108226,
      "MoBERT-min(F/N)": 0.26616231607108226,
      "MoBERT-max(F/N)": 0.2677316706671016,
      "MotionCritic": -8.943656921386719,
      "VeMo (human-opt view)": 0.7549407114624506,
      "VeMo (max entropy view)": 0.7422680412371134,
      "VeMo (min entropy view)": 0.7549407114624506,
      "VeMo (random view)": 0.7549407114624506,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is holding the right leg with both hands."
  },
  "006022": {
    "text": "someone runs backwards in a counterclockwise motion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7318132944174485,
      "Minus Multimodal Distance": -7.773547172546387,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8634359836578369,
      "MoBERT-F": 0.5965422969756422,
      "MoBERT-N": 0.6280164357930212,
      "MoBERT-min(F/N)": 0.5965422969756422,
      "MoBERT-max(F/N)": 0.6280164357930212,
      "MotionCritic": -8.986971855163574,
      "VeMo (human-opt view)": 0.6512261580381471,
      "VeMo (max entropy view)": 0.6512261580381471,
      "VeMo (min entropy view)": 0.7549407114624506,
      "VeMo (random view)": 0.7549407114624506,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone runs backward in a counter - clockwise motion."
  },
  "014612": {
    "text": "a person rolls their arms and shoulders.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5682810180010814,
      "Minus Multimodal Distance": -7.785942077636719,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.33246174454689026,
      "MoBERT-F": 0.6902137854886873,
      "MoBERT-N": 0.6391698532340118,
      "MoBERT-min(F/N)": 0.6391698532340118,
      "MoBERT-max(F/N)": 0.6902137854886873,
      "MotionCritic": -8.899015426635742,
      "VeMo (human-opt view)": 0.9047013977128335,
      "VeMo (max entropy view)": 0.9047013977128335,
      "VeMo (min entropy view)": 0.9498164014687882,
      "VeMo (random view)": 0.9498164014687882,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person rolls their arms and shoulders."
  },
  "011493": {
    "text": "a person swam in free style",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8709179895277427,
      "Minus Multimodal Distance": -8.341782569885254,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.370684342167806e-05,
      "MoBERT-F": 0.37665241179900144,
      "MoBERT-N": 0.42594409825136187,
      "MoBERT-min(F/N)": 0.37665241179900144,
      "MoBERT-max(F/N)": 0.42594409825136187,
      "MotionCritic": -11.988489151000977,
      "VeMo (human-opt view)": 0.28180737217598095,
      "VeMo (max entropy view)": 0.28180737217598095,
      "VeMo (min entropy view)": 0.1738562091503268,
      "VeMo (random view)": 0.28180737217598095,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person swam in freestyle."
  },
  "003575": {
    "text": "a person standing straight ,holding hands .",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.46625713419572096,
      "Minus Multimodal Distance": -5.410864353179932,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.809730176522862e-05,
      "MoBERT-F": 0.4154968297162632,
      "MoBERT-N": 0.492421281645413,
      "MoBERT-min(F/N)": 0.4154968297162632,
      "MoBERT-max(F/N)": 0.492421281645413,
      "MotionCritic": -7.944600582122803,
      "VeMo (human-opt view)": 0.9707214005433142,
      "VeMo (max entropy view)": 0.766295707472178,
      "VeMo (min entropy view)": 0.9707214005433142,
      "VeMo (random view)": 0.9707214005433142,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing straight, holding hands."
  },
  "001775": {
    "text": "a person dribbles a ball with one hand then the other and proceeds to shoot ball into goal with both hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7852280642723297,
      "Minus Multimodal Distance": -4.620584011077881,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.7842885255813599,
      "MoBERT-F": 0.660898877870439,
      "MoBERT-N": 0.6030522176567208,
      "MoBERT-min(F/N)": 0.6030522176567208,
      "MoBERT-max(F/N)": 0.660898877870439,
      "MotionCritic": -6.309328556060791,
      "VeMo (human-opt view)": 0.0002459726705442051,
      "VeMo (max entropy view)": 0.0012459255716784758,
      "VeMo (min entropy view)": 0.0002459726705442051,
      "VeMo (random view)": 0.0002459726705442051,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person dribbles the ball with one hand, then the other, and proceeds to shoot the ball into the goal with both hands."
  },
  "009096": {
    "text": "a person is standing still, then walks forward and kicks something on the floor with his right foot",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4470850473057723,
      "Minus Multimodal Distance": -7.143044948577881,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9966141581535339,
      "MoBERT-F": 0.7995599728735898,
      "MoBERT-N": 0.6849702175166085,
      "MoBERT-min(F/N)": 0.6849702175166085,
      "MoBERT-max(F/N)": 0.7995599728735898,
      "MotionCritic": -3.9531240463256836,
      "VeMo (human-opt view)": 0.9324618736383442,
      "VeMo (max entropy view)": 0.9146816803811174,
      "VeMo (min entropy view)": 0.9324618736383442,
      "VeMo (random view)": 0.9324618736383442,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is standing still. Then, he walks forward and kicks something on the floor with his right foot."
  },
  "001843": {
    "text": "a person standing in place lifts and waves with his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.22827789560801084,
      "Minus Multimodal Distance": -10.086057662963867,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.256831743987277e-05,
      "MoBERT-F": 0.2608812738522617,
      "MoBERT-N": 0.3572242319304334,
      "MoBERT-min(F/N)": 0.2608812738522617,
      "MoBERT-max(F/N)": 0.3572242319304334,
      "MotionCritic": -8.22423267364502,
      "VeMo (human-opt view)": 0.9896646942800789,
      "VeMo (max entropy view)": 0.9896646942800789,
      "VeMo (min entropy view)": 0.9947753396029259,
      "VeMo (random view)": 0.9947753396029259,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person standing in place lifts and waves with their right hand."
  },
  "013469": {
    "text": "person has arms out before lowering and running quickly on the spot",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.33625748806704553,
      "Minus Multimodal Distance": -4.468636989593506,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8409525839379057e-05,
      "MoBERT-F": 0.3783499279451771,
      "MoBERT-N": 0.463572657015129,
      "MoBERT-min(F/N)": 0.3783499279451771,
      "MoBERT-max(F/N)": 0.463572657015129,
      "MotionCritic": -7.346320629119873,
      "VeMo (human-opt view)": 0.9240246406570842,
      "VeMo (max entropy view)": 0.5627118644067797,
      "VeMo (min entropy view)": 0.9240246406570842,
      "VeMo (random view)": 0.5627118644067797,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person has their arms out before lowering them and running quickly on the spot."
  },
  "009084": {
    "text": "a person is bent forward with arms dangling in front of them",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.48595283245607535,
      "Minus Multimodal Distance": -8.612373352050781,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.5743574699154124e-05,
      "MoBERT-F": 0.29015913891596445,
      "MoBERT-N": 0.3863755332198204,
      "MoBERT-min(F/N)": 0.29015913891596445,
      "MoBERT-max(F/N)": 0.3863755332198204,
      "MotionCritic": -4.604377269744873,
      "VeMo (human-opt view)": 0.957933868127568,
      "VeMo (max entropy view)": 0.8987654320987655,
      "VeMo (min entropy view)": 0.957933868127568,
      "VeMo (random view)": 0.957933868127568,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is bent forward with their arms dangling in front of them."
  },
  "003539": {
    "text": "a person is pushed by their left arm while walking forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.33541288281712495,
      "Minus Multimodal Distance": -6.994901180267334,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.5176490200683475e-05,
      "MoBERT-F": 0.383404370560362,
      "MoBERT-N": 0.3672009195493945,
      "MoBERT-min(F/N)": 0.3672009195493945,
      "MoBERT-max(F/N)": 0.383404370560362,
      "MotionCritic": -3.695589780807495,
      "VeMo (human-opt view)": 0.24538258575197888,
      "VeMo (max entropy view)": 0.4231678486997636,
      "VeMo (min entropy view)": 0.24538258575197888,
      "VeMo (random view)": 0.24538258575197888,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is pushed by their left arm while walking forward."
  },
  "009135": {
    "text": "a person walks in a counter clockwise circle then sits down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1828729379266352,
      "Minus Multimodal Distance": -8.46824836730957,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8515100479125977,
      "MoBERT-F": 0.47092379639317955,
      "MoBERT-N": 0.5678610789563306,
      "MoBERT-min(F/N)": 0.47092379639317955,
      "MoBERT-max(F/N)": 0.5678610789563306,
      "MotionCritic": -10.728397369384766,
      "VeMo (human-opt view)": 0.9151157512482978,
      "VeMo (max entropy view)": 0.9151157512482978,
      "VeMo (min entropy view)": 0.9364341085271318,
      "VeMo (random view)": 0.9364341085271318,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks in a counter - clockwise circle and then sits down."
  },
  "011372": {
    "text": "a man waves and then beckons with his right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.2887106399650892,
      "Minus Multimodal Distance": -4.751346111297607,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.007744271599222e-05,
      "MoBERT-F": 0.31315345663477245,
      "MoBERT-N": 0.36830787402453996,
      "MoBERT-min(F/N)": 0.31315345663477245,
      "MoBERT-max(F/N)": 0.36830787402453996,
      "MotionCritic": -4.1915082931518555,
      "VeMo (human-opt view)": 0.9282156460591855,
      "VeMo (max entropy view)": 0.9282156460591855,
      "VeMo (min entropy view)": 0.955329500221141,
      "VeMo (random view)": 0.955329500221141,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man waves and then beckons with his right hand."
  },
  "007693": {
    "text": "the person claps and puts their hands down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.21590496757215033,
      "Minus Multimodal Distance": -8.774813652038574,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.0839055398246273e-05,
      "MoBERT-F": 0.32182724193644785,
      "MoBERT-N": 0.42304493832365553,
      "MoBERT-min(F/N)": 0.32182724193644785,
      "MoBERT-max(F/N)": 0.42304493832365553,
      "MotionCritic": -3.0276241302490234,
      "VeMo (human-opt view)": 0.9985006026989152,
      "VeMo (max entropy view)": 0.9198012775017743,
      "VeMo (min entropy view)": 0.9985006026989152,
      "VeMo (random view)": 0.9198012775017743,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person claps and puts their hands down."
  },
  "001059": {
    "text": "the person is holding their head while walking.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1858492495040838,
      "Minus Multimodal Distance": -4.819677352905273,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.406634303042665e-05,
      "MoBERT-F": 0.42790557345119284,
      "MoBERT-N": 0.49873938933130485,
      "MoBERT-min(F/N)": 0.42790557345119284,
      "MoBERT-max(F/N)": 0.49873938933130485,
      "MotionCritic": -4.859267711639404,
      "VeMo (human-opt view)": 0.8992248062015504,
      "VeMo (max entropy view)": 0.8992248062015504,
      "VeMo (min entropy view)": 0.8992460589444825,
      "VeMo (random view)": 0.8992460589444825,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is holding their head while walking."
  },
  "014279": {
    "text": "stay stand on the place and run.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4168391182501533,
      "Minus Multimodal Distance": -3.847182512283325,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.003050361294299364,
      "MoBERT-F": 0.5253253495343676,
      "MoBERT-N": 0.6928580244496916,
      "MoBERT-min(F/N)": 0.5253253495343676,
      "MoBERT-max(F/N)": 0.6928580244496916,
      "MotionCritic": -3.534972906112671,
      "VeMo (human-opt view)": 0.7425414364640884,
      "VeMo (max entropy view)": 0.7425414364640884,
      "VeMo (min entropy view)": 0.23383084577114427,
      "VeMo (random view)": 0.23383084577114427,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stays standing on the place and runs."
  },
  "008357": {
    "text": "raising hands above head and stretching.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4215703306799276,
      "Minus Multimodal Distance": -7.1408915519714355,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00040867668576538563,
      "MoBERT-F": 0.5145928197537578,
      "MoBERT-N": 0.5165385308423295,
      "MoBERT-min(F/N)": 0.5145928197537578,
      "MoBERT-max(F/N)": 0.5165385308423295,
      "MotionCritic": -6.028029918670654,
      "VeMo (human-opt view)": 0.9995119570522206,
      "VeMo (max entropy view)": 0.9995119570522206,
      "VeMo (min entropy view)": 0.9995424457798249,
      "VeMo (random view)": 0.9995119570522206,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is raising hands above the head and stretching."
  },
  "009816": {
    "text": "a person slowly jumped forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.18433853477196263,
      "Minus Multimodal Distance": -5.349155902862549,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.7111334424698725e-05,
      "MoBERT-F": 0.5694535402317962,
      "MoBERT-N": 0.5485844913704773,
      "MoBERT-min(F/N)": 0.5485844913704773,
      "MoBERT-max(F/N)": 0.5694535402317962,
      "MotionCritic": -6.572849273681641,
      "VeMo (human-opt view)": 0.6924564796905223,
      "VeMo (max entropy view)": 0.6924564796905223,
      "VeMo (min entropy view)": 0.07571288102261553,
      "VeMo (random view)": 0.6924564796905223,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumped forward slowly."
  },
  "006053": {
    "text": "waving arms around randomly quickly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7622101005992585,
      "Minus Multimodal Distance": -2.4590916633605957,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00011809418356278911,
      "MoBERT-F": 0.5546334494819931,
      "MoBERT-N": 0.5551619248324884,
      "MoBERT-min(F/N)": 0.5546334494819931,
      "MoBERT-max(F/N)": 0.5551619248324884,
      "MotionCritic": -4.48065710067749,
      "VeMo (human-opt view)": 0.9323583180987203,
      "VeMo (max entropy view)": 0.9323583180987203,
      "VeMo (min entropy view)": 0.9668485102811583,
      "VeMo (random view)": 0.9323583180987203,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is waving arms around randomly quickly."
  },
  "010703": {
    "text": "hands are on the knees, hands go to the chest in a scratching motion, and then hands go back down to the knees.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8309968851281107,
      "Minus Multimodal Distance": -7.437653064727783,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.838187694782391e-05,
      "MoBERT-F": 0.2541655687013978,
      "MoBERT-N": 0.35688457086748326,
      "MoBERT-min(F/N)": 0.2541655687013978,
      "MoBERT-max(F/N)": 0.35688457086748326,
      "MotionCritic": -4.917845726013184,
      "VeMo (human-opt view)": 0.7302158273381295,
      "VeMo (max entropy view)": 0.7302158273381295,
      "VeMo (min entropy view)": 0.039652362846279196,
      "VeMo (random view)": 0.039652362846279196,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person's hands are on the knees, hands go to the chest in a scratching motion, and then hands go back down to the knees."
  },
  "012492": {
    "text": "a person stands with legs shoulder-width apart, slightly bent at the knees, arms outstretched at shoulder height, lowers left arm for several seconds, then brings with arm back to shoulder height.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.21645038643298628,
      "Minus Multimodal Distance": -5.675901412963867,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8775830287486315e-05,
      "MoBERT-F": 0.3046867162162483,
      "MoBERT-N": 0.3967241634185819,
      "MoBERT-min(F/N)": 0.3046867162162483,
      "MoBERT-max(F/N)": 0.3967241634185819,
      "MotionCritic": -7.101635456085205,
      "VeMo (human-opt view)": 0.8991596638655462,
      "VeMo (max entropy view)": 0.8991596638655462,
      "VeMo (min entropy view)": 0.8991596638655462,
      "VeMo (random view)": 0.8991596638655462,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands with their legs shoulder - width apart, knees slightly bent, arms outstretched at shoulder height. They lower their left arm for several seconds, then bring the arm back to shoulder height."
  },
  "001225": {
    "text": "person walks five steps forward whilst holding right hand extended to the right hand side holding onto something",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4485518947432895,
      "Minus Multimodal Distance": -8.030966758728027,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.731656008516438e-05,
      "MoBERT-F": 0.2694848608635634,
      "MoBERT-N": 0.43388615482069226,
      "MoBERT-min(F/N)": 0.2694848608635634,
      "MoBERT-max(F/N)": 0.43388615482069226,
      "MotionCritic": 1.9836934804916382,
      "VeMo (human-opt view)": 0.34933333333333333,
      "VeMo (max entropy view)": 0.453416149068323,
      "VeMo (min entropy view)": 0.34933333333333333,
      "VeMo (random view)": 0.453416149068323,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks five steps forward while holding their right hand extended to the right side, grasping something."
  },
  "007777": {
    "text": "a person leans their body moving their right arm above their head past their left shoulder.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7389922116967863,
      "Minus Multimodal Distance": -9.023843765258789,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.830940113402903e-05,
      "MoBERT-F": 0.2988676325252051,
      "MoBERT-N": 0.3410386211110886,
      "MoBERT-min(F/N)": 0.2988676325252051,
      "MoBERT-max(F/N)": 0.3410386211110886,
      "MotionCritic": -4.55773401260376,
      "VeMo (human-opt view)": 0.9577304964539007,
      "VeMo (max entropy view)": 0.9577304964539007,
      "VeMo (min entropy view)": 0.9808636748518205,
      "VeMo (random view)": 0.9577304964539007,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person leans their body while moving their right arm above their head and past their left shoulder."
  },
  "000813": {
    "text": "a standing person lifts their left hand and slowly touches their head",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.37939953030774154,
      "Minus Multimodal Distance": -11.295703887939453,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.221367220859975e-05,
      "MoBERT-F": 0.28444216004732903,
      "MoBERT-N": 0.35699597223255725,
      "MoBERT-min(F/N)": 0.28444216004732903,
      "MoBERT-max(F/N)": 0.35699597223255725,
      "MotionCritic": -14.488313674926758,
      "VeMo (human-opt view)": 0.9830022527134958,
      "VeMo (max entropy view)": 0.981936887921654,
      "VeMo (min entropy view)": 0.9830022527134958,
      "VeMo (random view)": 0.9830022527134958,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A standing person lifts their left hand and slowly touches their head."
  },
  "002246": {
    "text": "a person walks up stairs turns left and walks back down stairs.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6945929817089399,
      "Minus Multimodal Distance": -8.835599899291992,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.418988151475787e-05,
      "MoBERT-F": 0.40378017247317655,
      "MoBERT-N": 0.5212836477641891,
      "MoBERT-min(F/N)": 0.40378017247317655,
      "MoBERT-max(F/N)": 0.5212836477641891,
      "MotionCritic": -12.773216247558594,
      "VeMo (human-opt view)": 0.0010343796418399213,
      "VeMo (max entropy view)": 0.0010343796418399213,
      "VeMo (min entropy view)": 0.0009119345760484796,
      "VeMo (random view)": 0.0010343796418399213,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks up the stairs, turns left, and walks back down the stairs."
  },
  "004841": {
    "text": "a person struggling to push arms forward and upwards, arms shaking.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7257139147437388,
      "Minus Multimodal Distance": -4.276540756225586,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0034738751128315926,
      "MoBERT-F": 0.5810936649520455,
      "MoBERT-N": 0.6545784006952046,
      "MoBERT-min(F/N)": 0.5810936649520455,
      "MoBERT-max(F/N)": 0.6545784006952046,
      "MotionCritic": -4.26434326171875,
      "VeMo (human-opt view)": 0.8523908523908524,
      "VeMo (max entropy view)": 0.8267148014440433,
      "VeMo (min entropy view)": 0.8523908523908524,
      "VeMo (random view)": 0.8267148014440433,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is struggling to push their arms forward and upwards, with their arms shaking."
  },
  "011252": {
    "text": "the person is walking up the stairs and stood still.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6614381467210726,
      "Minus Multimodal Distance": -7.183590412139893,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.3481140732765198,
      "MoBERT-F": 0.7029253141116992,
      "MoBERT-N": 0.6345922011677485,
      "MoBERT-min(F/N)": 0.6345922011677485,
      "MoBERT-max(F/N)": 0.7029253141116992,
      "MotionCritic": -3.0884287357330322,
      "VeMo (human-opt view)": 0.022963438101347017,
      "VeMo (max entropy view)": 0.022963438101347017,
      "VeMo (min entropy view)": 0.004888391542493669,
      "VeMo (random view)": 0.022963438101347017,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was walking up the stairs and then stood still."
  },
  "001840": {
    "text": "someone execures a roundhouse kick with their left foot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7617449039326566,
      "Minus Multimodal Distance": -9.472800254821777,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9843600392341614,
      "MoBERT-F": 0.8537210691288726,
      "MoBERT-N": 0.6180733679418711,
      "MoBERT-min(F/N)": 0.6180733679418711,
      "MoBERT-max(F/N)": 0.8537210691288726,
      "MotionCritic": -2.8846547603607178,
      "VeMo (human-opt view)": 0.928537170263789,
      "VeMo (max entropy view)": 0.8740655549166187,
      "VeMo (min entropy view)": 0.928537170263789,
      "VeMo (random view)": 0.928537170263789,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone executes a roundhouse kick with their left foot."
  },
  "012673": {
    "text": "a person slowly walked forward and sat on something",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7924019947248437,
      "Minus Multimodal Distance": -3.30013108253479,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 7.066365651553497e-05,
      "MoBERT-F": 0.42093126233121475,
      "MoBERT-N": 0.4810670647226945,
      "MoBERT-min(F/N)": 0.42093126233121475,
      "MoBERT-max(F/N)": 0.4810670647226945,
      "MotionCritic": -6.022299289703369,
      "VeMo (human-opt view)": 0.960256068284876,
      "VeMo (max entropy view)": 0.8181818181818182,
      "VeMo (min entropy view)": 0.960256068284876,
      "VeMo (random view)": 0.8181818181818182,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walked forward slowly and sat on something."
  },
  "014326": {
    "text": "person lunges forward with left foot first repeatedly",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5863069877693667,
      "Minus Multimodal Distance": -2.2390334606170654,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.2938351068878546e-05,
      "MoBERT-F": 0.5132776960644087,
      "MoBERT-N": 0.6038306334976747,
      "MoBERT-min(F/N)": 0.5132776960644087,
      "MoBERT-max(F/N)": 0.6038306334976747,
      "MotionCritic": -6.674098491668701,
      "VeMo (human-opt view)": 0.9045362220717671,
      "VeMo (max entropy view)": 0.8806660499537465,
      "VeMo (min entropy view)": 0.9045362220717671,
      "VeMo (random view)": 0.8806660499537465,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lunges forward with left foot first repeatedly"
  },
  "008474": {
    "text": "a person turns right while walking then stops",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.25746887432293336,
      "Minus Multimodal Distance": -4.823400497436523,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0028263465501368046,
      "MoBERT-F": 0.46357397916212184,
      "MoBERT-N": 0.5698520427096853,
      "MoBERT-min(F/N)": 0.46357397916212184,
      "MoBERT-max(F/N)": 0.5698520427096853,
      "MotionCritic": -1.2574809789657593,
      "VeMo (human-opt view)": 0.9724845995893224,
      "VeMo (max entropy view)": 0.9724845995893224,
      "VeMo (min entropy view)": 0.9896907216494846,
      "VeMo (random view)": 0.9896907216494846,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person turns right while walking, then stops."
  },
  "003363": {
    "text": "a person hammers a nail into a wall.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7117598299837583,
      "Minus Multimodal Distance": -7.407304763793945,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.007234715856611729,
      "MoBERT-F": 0.5976603749864484,
      "MoBERT-N": 0.5938739671307871,
      "MoBERT-min(F/N)": 0.5938739671307871,
      "MoBERT-max(F/N)": 0.5976603749864484,
      "MotionCritic": -0.4876459836959839,
      "VeMo (human-opt view)": 2.4366154701425895e-05,
      "VeMo (max entropy view)": 0.0015925119438395788,
      "VeMo (min entropy view)": 2.4366154701425895e-05,
      "VeMo (random view)": 0.0015925119438395788,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person hammers a nail into a wall."
  },
  "002922": {
    "text": "someone completes a full jumping jack before stopping.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6912206119667588,
      "Minus Multimodal Distance": -6.372330665588379,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.966823935508728,
      "MoBERT-F": 0.8411121254847231,
      "MoBERT-N": 0.8095582914738442,
      "MoBERT-min(F/N)": 0.8095582914738442,
      "MoBERT-max(F/N)": 0.8411121254847231,
      "MotionCritic": -8.517417907714844,
      "VeMo (human-opt view)": 0.9282836422240129,
      "VeMo (max entropy view)": 0.9282836422240129,
      "VeMo (min entropy view)": 0.9363519863306279,
      "VeMo (random view)": 0.9282836422240129,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone completes a full jumping jack before stopping."
  },
  "009910": {
    "text": "the person stand and touch the head.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.24359341223965048,
      "Minus Multimodal Distance": -5.614549160003662,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.815746822510846e-05,
      "MoBERT-F": 0.28945838434433585,
      "MoBERT-N": 0.3784839397312973,
      "MoBERT-min(F/N)": 0.28945838434433585,
      "MoBERT-max(F/N)": 0.3784839397312973,
      "MotionCritic": -4.145504951477051,
      "VeMo (human-opt view)": 0.9924491967769726,
      "VeMo (max entropy view)": 0.9924491967769726,
      "VeMo (min entropy view)": 0.9933114075606962,
      "VeMo (random view)": 0.9924491967769726,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person stands and touches the head."
  },
  "009712": {
    "text": "a person is performing a ballet dance.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.261935009506411,
      "Minus Multimodal Distance": -8.45434284210205,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9704421758651733,
      "MoBERT-F": 0.8981508281293605,
      "MoBERT-N": 0.7371335717071349,
      "MoBERT-min(F/N)": 0.7371335717071349,
      "MoBERT-max(F/N)": 0.8981508281293605,
      "MotionCritic": -7.6429266929626465,
      "VeMo (human-opt view)": 0.8080808080808081,
      "VeMo (max entropy view)": 0.8080808080808081,
      "VeMo (min entropy view)": 0.8176100628930818,
      "VeMo (random view)": 0.8176100628930818,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is performing a ballet dance."
  },
  "003790": {
    "text": "a person with their shoulders held high",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.37097527680273884,
      "Minus Multimodal Distance": -6.0402655601501465,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.6227483835536987e-05,
      "MoBERT-F": 0.38294066671948057,
      "MoBERT-N": 0.4913042363467932,
      "MoBERT-min(F/N)": 0.38294066671948057,
      "MoBERT-max(F/N)": 0.4913042363467932,
      "MotionCritic": -6.981910228729248,
      "VeMo (human-opt view)": 0.30700179533213645,
      "VeMo (max entropy view)": 0.3922413793103448,
      "VeMo (min entropy view)": 0.30700179533213645,
      "VeMo (random view)": 0.30700179533213645,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person with their shoulders held high."
  },
  "006726": {
    "text": "slowly wake on the left side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7861618389697933,
      "Minus Multimodal Distance": -8.863879203796387,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.490157748979982e-05,
      "MoBERT-F": 0.3374329652232504,
      "MoBERT-N": 0.44521462809641216,
      "MoBERT-min(F/N)": 0.3374329652232504,
      "MoBERT-max(F/N)": 0.44521462809641216,
      "MotionCritic": -7.9009575843811035,
      "VeMo (human-opt view)": 0.005533770660021256,
      "VeMo (max entropy view)": 0.008040298362879008,
      "VeMo (min entropy view)": 0.005533770660021256,
      "VeMo (random view)": 0.005533770660021256,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person slowly wakes on the left side."
  },
  "004499": {
    "text": "a man is doing jumping jacks.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5757868305011546,
      "Minus Multimodal Distance": -3.45023250579834,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9452320337295532,
      "MoBERT-F": 0.83139672223538,
      "MoBERT-N": 0.7931396663733183,
      "MoBERT-min(F/N)": 0.7931396663733183,
      "MoBERT-max(F/N)": 0.83139672223538,
      "MotionCritic": -5.583404064178467,
      "VeMo (human-opt view)": 0.8873642081189251,
      "VeMo (max entropy view)": 0.8873642081189251,
      "VeMo (min entropy view)": 0.8936454849498328,
      "VeMo (random view)": 0.8936454849498328,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is doing jumping jacks."
  },
  "013632": {
    "text": "a person shakes arms to lossen up then walks backwads, then moves forward with the right arm in a swiming motion",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6303091923743294,
      "Minus Multimodal Distance": -7.581414699554443,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00035107461735606194,
      "MoBERT-F": 0.4274992127263244,
      "MoBERT-N": 0.5173020174688274,
      "MoBERT-min(F/N)": 0.4274992127263244,
      "MoBERT-max(F/N)": 0.5173020174688274,
      "MotionCritic": -4.064521312713623,
      "VeMo (human-opt view)": 0.6220095693779905,
      "VeMo (max entropy view)": 0.6220095693779905,
      "VeMo (min entropy view)": 0.8266129032258065,
      "VeMo (random view)": 0.6220095693779905,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person shakes their arms to loosen up, then walks backward, then moves forward with the right arm in a swimming motion."
  },
  "003807": {
    "text": "the person walks in a straight line and places their right hand to support their weight against something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5443698832297914,
      "Minus Multimodal Distance": -2.653869152069092,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00010513410961721092,
      "MoBERT-F": 0.22849571616832304,
      "MoBERT-N": 0.34588682872974746,
      "MoBERT-min(F/N)": 0.22849571616832304,
      "MoBERT-max(F/N)": 0.34588682872974746,
      "MotionCritic": -6.9135613441467285,
      "VeMo (human-opt view)": 0.6789473684210526,
      "VeMo (max entropy view)": 0.6789473684210526,
      "VeMo (min entropy view)": 0.7061611374407583,
      "VeMo (random view)": 0.6789473684210526,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person walks in a straight line and places their right hand against something to support their weight."
  },
  "010706": {
    "text": "a man throws an object with his right hand while lifting his right leg off the ground.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5727679952022479,
      "Minus Multimodal Distance": -1.846989631652832,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00021604952053166926,
      "MoBERT-F": 0.45697162694024485,
      "MoBERT-N": 0.511204276350509,
      "MoBERT-min(F/N)": 0.45697162694024485,
      "MoBERT-max(F/N)": 0.511204276350509,
      "MotionCritic": -6.256444931030273,
      "VeMo (human-opt view)": 0.7431906614785992,
      "VeMo (max entropy view)": 0.7431906614785992,
      "VeMo (min entropy view)": 0.7985480943738656,
      "VeMo (random view)": 0.7985480943738656,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man throws an object with his right hand while lifting his right leg off the ground."
  },
  "008838": {
    "text": "person picked up his phone made a call and then put it back down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.43913660405077104,
      "Minus Multimodal Distance": -10.034561157226562,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6969351893058047e-05,
      "MoBERT-F": 0.273465231342973,
      "MoBERT-N": 0.4335731655969545,
      "MoBERT-min(F/N)": 0.273465231342973,
      "MoBERT-max(F/N)": 0.4335731655969545,
      "MotionCritic": -8.122944831848145,
      "VeMo (human-opt view)": 2.2039987318530372e-05,
      "VeMo (max entropy view)": 4.5546549484504156e-05,
      "VeMo (min entropy view)": 2.2039987318530372e-05,
      "VeMo (random view)": 2.2039987318530372e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person picked up his phone, made a call, and then put it back down."
  },
  "012579": {
    "text": "a person steps forward, then kneels down using their left hand for support.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4329812312812755,
      "Minus Multimodal Distance": -4.541685104370117,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.438206684018951e-05,
      "MoBERT-F": 0.340862485638243,
      "MoBERT-N": 0.4908758539192309,
      "MoBERT-min(F/N)": 0.340862485638243,
      "MoBERT-max(F/N)": 0.4908758539192309,
      "MotionCritic": -5.963098049163818,
      "VeMo (human-opt view)": 0.9282442748091603,
      "VeMo (max entropy view)": 0.8991060025542784,
      "VeMo (min entropy view)": 0.9282442748091603,
      "VeMo (random view)": 0.9282442748091603,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps forward and then kneels down, using their left hand for support."
  },
  "005312": {
    "text": "a person is dancing with expressive arm movement and then jumps forward",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.921287642105723,
      "Minus Multimodal Distance": -3.770261526107788,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0002338609629077837,
      "MoBERT-F": 0.507757257738243,
      "MoBERT-N": 0.5972197916418978,
      "MoBERT-min(F/N)": 0.507757257738243,
      "MoBERT-max(F/N)": 0.5972197916418978,
      "MotionCritic": -5.108991622924805,
      "VeMo (human-opt view)": 0.8736196319018404,
      "VeMo (max entropy view)": 0.8519362186788155,
      "VeMo (min entropy view)": 0.8736196319018404,
      "VeMo (random view)": 0.8736196319018404,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is dancing with expressive arm movements and then jumps forward."
  },
  "002087": {
    "text": "a person walks forward while holding out their arms for balance",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4744338171852147,
      "Minus Multimodal Distance": -11.382946014404297,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.3708019802579656e-05,
      "MoBERT-F": 0.3790693941377064,
      "MoBERT-N": 0.48038381510508504,
      "MoBERT-min(F/N)": 0.3790693941377064,
      "MoBERT-max(F/N)": 0.48038381510508504,
      "MotionCritic": -9.039375305175781,
      "VeMo (human-opt view)": 0.9796727813584531,
      "VeMo (max entropy view)": 0.9603985056039851,
      "VeMo (min entropy view)": 0.9796727813584531,
      "VeMo (random view)": 0.9796727813584531,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward while holding out their arms for balance."
  },
  "006213": {
    "text": "a person winds up his arm and then pitches a ball.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.819345449906319,
      "Minus Multimodal Distance": -3.662170886993408,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.209796604001895e-05,
      "MoBERT-F": 0.5196244459497183,
      "MoBERT-N": 0.5308601970342521,
      "MoBERT-min(F/N)": 0.5196244459497183,
      "MoBERT-max(F/N)": 0.5308601970342521,
      "MotionCritic": -7.249910831451416,
      "VeMo (human-opt view)": 0.8602150537634409,
      "VeMo (max entropy view)": 0.8599605522682445,
      "VeMo (min entropy view)": 0.8602150537634409,
      "VeMo (random view)": 0.8599605522682445,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person winds up his arm and then pitches a ball."
  },
  "003245": {
    "text": "a person raises their arms high above their head.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3346815081019696,
      "Minus Multimodal Distance": -7.492127418518066,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0016138771316036582,
      "MoBERT-F": 0.5319553499919766,
      "MoBERT-N": 0.5484245320063088,
      "MoBERT-min(F/N)": 0.5319553499919766,
      "MoBERT-max(F/N)": 0.5484245320063088,
      "MotionCritic": -6.816473484039307,
      "VeMo (human-opt view)": 0.9990306783992347,
      "VeMo (max entropy view)": 0.9990306783992347,
      "VeMo (min entropy view)": 0.9991951624276181,
      "VeMo (random view)": 0.9991951624276181,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises their arms high above their head."
  },
  "008500": {
    "text": "a person jumps straight up with both arms down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6451418298890371,
      "Minus Multimodal Distance": -5.489657402038574,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00047349854139611125,
      "MoBERT-F": 0.5374177352175169,
      "MoBERT-N": 0.6529001232877655,
      "MoBERT-min(F/N)": 0.5374177352175169,
      "MoBERT-max(F/N)": 0.6529001232877655,
      "MotionCritic": -9.562276840209961,
      "VeMo (human-opt view)": 0.07179115300942712,
      "VeMo (max entropy view)": 0.07179115300942712,
      "VeMo (min entropy view)": 0.031145878603505723,
      "VeMo (random view)": 0.07179115300942712,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps straight up with both arms down."
  },
  "002917": {
    "text": "a man moves his hand up in the air.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9736348136674012,
      "Minus Multimodal Distance": -3.9168365001678467,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8236501748324372e-05,
      "MoBERT-F": 0.37565066579256495,
      "MoBERT-N": 0.4014265739683539,
      "MoBERT-min(F/N)": 0.37565066579256495,
      "MoBERT-max(F/N)": 0.4014265739683539,
      "MotionCritic": -9.721948623657227,
      "VeMo (human-opt view)": 0.9497231450719823,
      "VeMo (max entropy view)": 0.9497231450719823,
      "VeMo (min entropy view)": 0.9875370919881306,
      "VeMo (random view)": 0.9497231450719823,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man moves his hand up in the air."
  },
  "007409": {
    "text": "the person is trying to hit a bug.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8759104166916782,
      "Minus Multimodal Distance": -7.82204532623291,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0001086322299670428,
      "MoBERT-F": 0.5267624406450855,
      "MoBERT-N": 0.5945489735409817,
      "MoBERT-min(F/N)": 0.5267624406450855,
      "MoBERT-max(F/N)": 0.5945489735409817,
      "MotionCritic": -1.0600823163986206,
      "VeMo (human-opt view)": 0.8174904942965779,
      "VeMo (max entropy view)": 0.8174904942965779,
      "VeMo (min entropy view)": 0.8595988538681948,
      "VeMo (random view)": 0.8174904942965779,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is trying to hit a bug."
  },
  "014295": {
    "text": "person down on hands and knees stands up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5415376847937323,
      "Minus Multimodal Distance": -3.398658514022827,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.162586603546515e-05,
      "MoBERT-F": 0.4404348492229996,
      "MoBERT-N": 0.4881899065789655,
      "MoBERT-min(F/N)": 0.4404348492229996,
      "MoBERT-max(F/N)": 0.4881899065789655,
      "MotionCritic": -4.622334003448486,
      "VeMo (human-opt view)": 0.99954044281363,
      "VeMo (max entropy view)": 0.9989036507566537,
      "VeMo (min entropy view)": 0.99954044281363,
      "VeMo (random view)": 0.99954044281363,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who is down on their hands and knees stands up."
  },
  "002318": {
    "text": "pacing back and forth from left to right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6267818138379744,
      "Minus Multimodal Distance": -2.390760660171509,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.3216068148612976,
      "MoBERT-F": 0.6373802320382322,
      "MoBERT-N": 0.7324615038993908,
      "MoBERT-min(F/N)": 0.6373802320382322,
      "MoBERT-max(F/N)": 0.7324615038993908,
      "MotionCritic": -12.073614120483398,
      "VeMo (human-opt view)": 0.9770385999751768,
      "VeMo (max entropy view)": 0.9740847387906212,
      "VeMo (min entropy view)": 0.9770385999751768,
      "VeMo (random view)": 0.9740847387906212,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Pacing back and forth from left to right."
  },
  "013619": {
    "text": "a man swings his right arm back repeatedly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3271543680289239,
      "Minus Multimodal Distance": -8.449342727661133,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00423180079087615,
      "MoBERT-F": 0.5306828852151895,
      "MoBERT-N": 0.4826000017208908,
      "MoBERT-min(F/N)": 0.4826000017208908,
      "MoBERT-max(F/N)": 0.5306828852151895,
      "MotionCritic": -3.7576730251312256,
      "VeMo (human-opt view)": 0.7773722627737226,
      "VeMo (max entropy view)": 0.7773722627737226,
      "VeMo (min entropy view)": 0.859538784067086,
      "VeMo (random view)": 0.7773722627737226,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man swings his right arm back repeatedly."
  },
  "000700": {
    "text": "a person jogs in a straight line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6200999933705619,
      "Minus Multimodal Distance": -3.5190906524658203,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.0940569053636864e-05,
      "MoBERT-F": 0.5466354469155459,
      "MoBERT-N": 0.6495341486132168,
      "MoBERT-min(F/N)": 0.5466354469155459,
      "MoBERT-max(F/N)": 0.6495341486132168,
      "MotionCritic": -8.240560531616211,
      "VeMo (human-opt view)": 0.9819494584837545,
      "VeMo (max entropy view)": 0.9495733126454616,
      "VeMo (min entropy view)": 0.9819494584837545,
      "VeMo (random view)": 0.9495733126454616,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jogs in a straight line."
  },
  "000246": {
    "text": "a person cautiously walks in an arc",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.49415605778604893,
      "Minus Multimodal Distance": -5.438192367553711,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4398323148489e-05,
      "MoBERT-F": 0.395474913787821,
      "MoBERT-N": 0.5435186000654,
      "MoBERT-min(F/N)": 0.395474913787821,
      "MoBERT-max(F/N)": 0.5435186000654,
      "MotionCritic": -1.703070044517517,
      "VeMo (human-opt view)": 0.9362865736942849,
      "VeMo (max entropy view)": 0.9281553398058252,
      "VeMo (min entropy view)": 0.9362865736942849,
      "VeMo (random view)": 0.9281553398058252,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person cautiously walks in an arc."
  },
  "009831": {
    "text": "a person stiffly walks forward and backwards.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8627107784174102,
      "Minus Multimodal Distance": -6.81668758392334,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9464147686958313,
      "MoBERT-F": 0.6724754556045636,
      "MoBERT-N": 0.5753386815989301,
      "MoBERT-min(F/N)": 0.5753386815989301,
      "MoBERT-max(F/N)": 0.6724754556045636,
      "MotionCritic": -12.78560733795166,
      "VeMo (human-opt view)": 0.9399830938292477,
      "VeMo (max entropy view)": 0.835820895522388,
      "VeMo (min entropy view)": 0.9399830938292477,
      "VeMo (random view)": 0.835820895522388,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks stiffly forwards and backwards."
  },
  "002798": {
    "text": "a person is cheering and dancing",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9531900574144635,
      "Minus Multimodal Distance": -10.221463203430176,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0016430830582976341,
      "MoBERT-F": 0.6332061394816435,
      "MoBERT-N": 0.6327424118043004,
      "MoBERT-min(F/N)": 0.6327424118043004,
      "MoBERT-max(F/N)": 0.6332061394816435,
      "MotionCritic": -5.434449672698975,
      "VeMo (human-opt view)": 0.9498327759197325,
      "VeMo (max entropy view)": 0.9324991768192296,
      "VeMo (min entropy view)": 0.9498327759197325,
      "VeMo (random view)": 0.9324991768192296,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is cheering and dancing."
  },
  "012625": {
    "text": "subject kicks out right leg.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.33198275662814036,
      "Minus Multimodal Distance": -5.404664039611816,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8397824764251709,
      "MoBERT-F": 0.7653509347005221,
      "MoBERT-N": 0.6127799849446685,
      "MoBERT-min(F/N)": 0.6127799849446685,
      "MoBERT-max(F/N)": 0.7653509347005221,
      "MotionCritic": -0.2783729135990143,
      "VeMo (human-opt view)": 0.9940743400969654,
      "VeMo (max entropy view)": 0.9896547969073287,
      "VeMo (min entropy view)": 0.9940743400969654,
      "VeMo (random view)": 0.9940743400969654,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Subject kicks out the right leg."
  },
  "013004": {
    "text": "a man swings his right arm behind him and then swings it forward and down toward the left, as if he is brushing something aside,",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.34923100678844476,
      "Minus Multimodal Distance": -7.187004089355469,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.26801148528466e-05,
      "MoBERT-F": 0.3434167375810124,
      "MoBERT-N": 0.42711072971752245,
      "MoBERT-min(F/N)": 0.3434167375810124,
      "MoBERT-max(F/N)": 0.42711072971752245,
      "MotionCritic": -2.6026806831359863,
      "VeMo (human-opt view)": 0.8809849521203831,
      "VeMo (max entropy view)": 0.8809849521203831,
      "VeMo (min entropy view)": 0.9284253578732107,
      "VeMo (random view)": 0.8809849521203831,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man swings his right arm behind him, then swings it forward and down toward the left, as if he is brushing something aside."
  },
  "006390": {
    "text": "a person bends down and touches their toes, then reaches up and stretches back and forth",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7011524125906907,
      "Minus Multimodal Distance": -2.906498908996582,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9881608486175537,
      "MoBERT-F": 0.604205661437818,
      "MoBERT-N": 0.5491286108027182,
      "MoBERT-min(F/N)": 0.5491286108027182,
      "MoBERT-max(F/N)": 0.604205661437818,
      "MotionCritic": -5.454730987548828,
      "VeMo (human-opt view)": 0.8808757244043787,
      "VeMo (max entropy view)": 0.8808757244043787,
      "VeMo (min entropy view)": 0.9197149643705463,
      "VeMo (random view)": 0.8808757244043787,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends down and touches their toes, then reaches up and stretches back and forth."
  },
  "004472": {
    "text": "a man dodges something to the left, then the right and then the left again.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8157756745348593,
      "Minus Multimodal Distance": -8.806269645690918,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.09136053174734116,
      "MoBERT-F": 0.545913851546699,
      "MoBERT-N": 0.5434845043779726,
      "MoBERT-min(F/N)": 0.5434845043779726,
      "MoBERT-max(F/N)": 0.545913851546699,
      "MotionCritic": -4.7938618659973145,
      "VeMo (human-opt view)": 0.8668866886688669,
      "VeMo (max entropy view)": 0.8589473684210527,
      "VeMo (min entropy view)": 0.8668866886688669,
      "VeMo (random view)": 0.8668866886688669,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man dodges something to the left, then to the right, and then to the left again."
  },
  "010797": {
    "text": "a person walks forward and side kicks then puts his hand to his face",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.628819598732085,
      "Minus Multimodal Distance": -6.176387786865234,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8091533184051514,
      "MoBERT-F": 0.6696529352330013,
      "MoBERT-N": 0.6589397680803339,
      "MoBERT-min(F/N)": 0.6589397680803339,
      "MoBERT-max(F/N)": 0.6696529352330013,
      "MotionCritic": -4.500890254974365,
      "VeMo (human-opt view)": 0.8668171557562077,
      "VeMo (max entropy view)": 0.7066666666666667,
      "VeMo (min entropy view)": 0.8668171557562077,
      "VeMo (random view)": 0.7066666666666667,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, performs a side kick, and then puts his hand to his face."
  },
  "012282": {
    "text": "a man lowers his arms, then moves something in front of his face.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.1795207336439241,
      "Minus Multimodal Distance": -7.40179967880249,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.1020230128197e-05,
      "MoBERT-F": 0.26500171052222177,
      "MoBERT-N": 0.34230390536326516,
      "MoBERT-min(F/N)": 0.26500171052222177,
      "MoBERT-max(F/N)": 0.34230390536326516,
      "MotionCritic": -8.998176574707031,
      "VeMo (human-opt view)": 0.9239189928845101,
      "VeMo (max entropy view)": 0.09517923362175525,
      "VeMo (min entropy view)": 0.9239189928845101,
      "VeMo (random view)": 0.9239189928845101,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man lowers his arms and then moves something in front of his face."
  },
  "004176": {
    "text": "a person walks forward and moves something with his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.29389858210927516,
      "Minus Multimodal Distance": -7.134110450744629,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9564875148935243e-05,
      "MoBERT-F": 0.3525471759054316,
      "MoBERT-N": 0.44882468012603177,
      "MoBERT-min(F/N)": 0.3525471759054316,
      "MoBERT-max(F/N)": 0.44882468012603177,
      "MotionCritic": -10.46130657196045,
      "VeMo (human-opt view)": 0.8990578734858681,
      "VeMo (max entropy view)": 0.8872987477638641,
      "VeMo (min entropy view)": 0.8990578734858681,
      "VeMo (random view)": 0.8872987477638641,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and moves something with his right hand."
  },
  "012041": {
    "text": "a person is jogging forward at a steady pace.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7264021330334186,
      "Minus Multimodal Distance": -4.077879428863525,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8313779694144614e-05,
      "MoBERT-F": 0.4781855856988593,
      "MoBERT-N": 0.5202965424698021,
      "MoBERT-min(F/N)": 0.4781855856988593,
      "MoBERT-max(F/N)": 0.5202965424698021,
      "MotionCritic": -8.058177947998047,
      "VeMo (human-opt view)": 0.9526566053338846,
      "VeMo (max entropy view)": 0.8264984227129337,
      "VeMo (min entropy view)": 0.9526566053338846,
      "VeMo (random view)": 0.8264984227129337,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is jogging forward at a steady pace."
  },
  "000825": {
    "text": "a person at a standstill starts running, then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6051564948816651,
      "Minus Multimodal Distance": -9.957234382629395,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.4126496757380664e-05,
      "MoBERT-F": 0.45956473443691265,
      "MoBERT-N": 0.5085130738934288,
      "MoBERT-min(F/N)": 0.45956473443691265,
      "MoBERT-max(F/N)": 0.5085130738934288,
      "MotionCritic": -6.090304851531982,
      "VeMo (human-opt view)": 0.996825865505278,
      "VeMo (max entropy view)": 0.9820321195738592,
      "VeMo (min entropy view)": 0.996825865505278,
      "VeMo (random view)": 0.996825865505278,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person at a standstill starts running and then stops."
  },
  "006735": {
    "text": "the man makes a gesture and ties his laces",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.2835193237253073,
      "Minus Multimodal Distance": -5.969911098480225,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7100657462142408e-05,
      "MoBERT-F": 0.4360647756792192,
      "MoBERT-N": 0.49971678390871066,
      "MoBERT-min(F/N)": 0.4360647756792192,
      "MoBERT-max(F/N)": 0.49971678390871066,
      "MotionCritic": -3.1796069145202637,
      "VeMo (human-opt view)": 0.004599554269998578,
      "VeMo (max entropy view)": 0.0757459831675593,
      "VeMo (min entropy view)": 0.004599554269998578,
      "VeMo (random view)": 0.004599554269998578,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man makes a gesture and ties his laces."
  },
  "012371": {
    "text": "swaying back-and-forth with their arms out.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6201976963486955,
      "Minus Multimodal Distance": -12.808050155639648,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0001317022106377408,
      "MoBERT-F": 0.5386664330237627,
      "MoBERT-N": 0.5506002864894673,
      "MoBERT-min(F/N)": 0.5386664330237627,
      "MoBERT-max(F/N)": 0.5506002864894673,
      "MotionCritic": -3.5460400581359863,
      "VeMo (human-opt view)": 0.9497716894977168,
      "VeMo (max entropy view)": 0.9497716894977168,
      "VeMo (min entropy view)": 0.9740501478156137,
      "VeMo (random view)": 0.9740501478156137,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Swaying back and forth with their arms out."
  },
  "001250": {
    "text": "person runs on an exercise machine.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4153920230921218,
      "Minus Multimodal Distance": -9.711097717285156,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3074646378518082e-05,
      "MoBERT-F": 0.42649082938378374,
      "MoBERT-N": 0.5648686765198768,
      "MoBERT-min(F/N)": 0.42649082938378374,
      "MoBERT-max(F/N)": 0.5648686765198768,
      "MotionCritic": -5.42492151260376,
      "VeMo (human-opt view)": 0.0006689330975267802,
      "VeMo (max entropy view)": 0.0006689330975267802,
      "VeMo (min entropy view)": 3.1226314131042646e-05,
      "VeMo (random view)": 3.1226314131042646e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs on an exercise machine."
  },
  "007779": {
    "text": "a person sits down and applauds.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3234428785590528,
      "Minus Multimodal Distance": -7.331998825073242,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7513249733601697e-05,
      "MoBERT-F": 0.3384685568540956,
      "MoBERT-N": 0.4422480984255079,
      "MoBERT-min(F/N)": 0.3384685568540956,
      "MoBERT-max(F/N)": 0.4422480984255079,
      "MotionCritic": -7.047402858734131,
      "VeMo (human-opt view)": 0.12582781456953643,
      "VeMo (max entropy view)": 0.12582781456953643,
      "VeMo (min entropy view)": 0.07578558225508318,
      "VeMo (random view)": 0.12582781456953643,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits down and applauds."
  },
  "008597": {
    "text": "a person is washing a window",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.26174100919472365,
      "Minus Multimodal Distance": -5.004558563232422,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.0403865821426734e-05,
      "MoBERT-F": 0.32257334190836034,
      "MoBERT-N": 0.3649249541233558,
      "MoBERT-min(F/N)": 0.32257334190836034,
      "MoBERT-max(F/N)": 0.3649249541233558,
      "MotionCritic": -3.8131914138793945,
      "VeMo (human-opt view)": 2.4292009386432428e-05,
      "VeMo (max entropy view)": 2.4292009386432428e-05,
      "VeMo (min entropy view)": 7.872306905675087e-06,
      "VeMo (random view)": 7.872306905675087e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is washing a window."
  },
  "002178": {
    "text": "the person tripped over his foot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.36285272700166005,
      "Minus Multimodal Distance": -3.8106377124786377,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.200606730999425e-05,
      "MoBERT-F": 0.4876524536290283,
      "MoBERT-N": 0.6206982750948281,
      "MoBERT-min(F/N)": 0.4876524536290283,
      "MoBERT-max(F/N)": 0.6206982750948281,
      "MotionCritic": -9.187089920043945,
      "VeMo (human-opt view)": 0.6924101198402131,
      "VeMo (max entropy view)": 0.5460992907801419,
      "VeMo (min entropy view)": 0.6924101198402131,
      "VeMo (random view)": 0.6924101198402131,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person tripped over his foot."
  },
  "009908": {
    "text": "moving forward on the floor.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.34036826800007747,
      "Minus Multimodal Distance": -12.181674003601074,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3964290448930115e-05,
      "MoBERT-F": 0.4169813360215038,
      "MoBERT-N": 0.5367499804683706,
      "MoBERT-min(F/N)": 0.4169813360215038,
      "MoBERT-max(F/N)": 0.5367499804683706,
      "MotionCritic": -2.0351955890655518,
      "VeMo (human-opt view)": 0.9580386610089581,
      "VeMo (max entropy view)": 0.9433962264150944,
      "VeMo (min entropy view)": 0.9580386610089581,
      "VeMo (random view)": 0.9580386610089581,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is moving forward on the floor."
  },
  "010083": {
    "text": "a person is standing and moves arms in a way that looks like they are picking something up and taking a drink or a bite",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.656484858909346,
      "Minus Multimodal Distance": -8.216687202453613,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.085407610749826e-05,
      "MoBERT-F": 0.2704335014880592,
      "MoBERT-N": 0.35205788606149097,
      "MoBERT-min(F/N)": 0.2704335014880592,
      "MoBERT-max(F/N)": 0.35205788606149097,
      "MotionCritic": -7.406391620635986,
      "VeMo (human-opt view)": 0.7318718381112985,
      "VeMo (max entropy view)": 0.6926994906621392,
      "VeMo (min entropy view)": 0.7318718381112985,
      "VeMo (random view)": 0.6926994906621392,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing and moves their arms in a way that looks like they are picking something up and taking a drink or a bite."
  },
  "000921": {
    "text": "person clasps both hands together then waves arms to side then goes down on right knee bent over ties something on feet then gets up",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4488682204959951,
      "Minus Multimodal Distance": -10.178512573242188,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4324772311956622e-05,
      "MoBERT-F": 0.2996325701227344,
      "MoBERT-N": 0.3743649808456895,
      "MoBERT-min(F/N)": 0.2996325701227344,
      "MoBERT-max(F/N)": 0.3743649808456895,
      "MotionCritic": -7.806381702423096,
      "VeMo (human-opt view)": 0.10112359550561797,
      "VeMo (max entropy view)": 0.13983050847457626,
      "VeMo (min entropy view)": 0.10112359550561797,
      "VeMo (random view)": 0.10112359550561797,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person clasps both hands together, then waves their arms to the side. Next, the person goes down on their right knee, bends over, ties something on their foot, and then gets up."
  },
  "012001": {
    "text": "a man picks up an object moves it a few inches to the right then places it back down",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.1030344595362943,
      "Minus Multimodal Distance": -5.01614236831665,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5054672732949257e-05,
      "MoBERT-F": 0.3399524546253718,
      "MoBERT-N": 0.47258732454504687,
      "MoBERT-min(F/N)": 0.3399524546253718,
      "MoBERT-max(F/N)": 0.47258732454504687,
      "MotionCritic": -1.3459887504577637,
      "VeMo (human-opt view)": 0.05339805825242718,
      "VeMo (max entropy view)": 0.060240963855421686,
      "VeMo (min entropy view)": 0.05339805825242718,
      "VeMo (random view)": 0.060240963855421686,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man picks up an object, moves it a few inches to the right, then places it back down."
  },
  "012398": {
    "text": "the person stands and squats a couple of times with their hands out in front of them",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6317034921432596,
      "Minus Multimodal Distance": -13.085084915161133,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0006013365346007049,
      "MoBERT-F": 0.5254934459714153,
      "MoBERT-N": 0.5011068386279663,
      "MoBERT-min(F/N)": 0.5011068386279663,
      "MoBERT-max(F/N)": 0.5254934459714153,
      "MotionCritic": -3.180081844329834,
      "VeMo (human-opt view)": 0.9914246196403873,
      "VeMo (max entropy view)": 0.9147286821705426,
      "VeMo (min entropy view)": 0.9914246196403873,
      "VeMo (random view)": 0.9147286821705426,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person stands and squats a couple of times with their hands out in front of them."
  },
  "008840": {
    "text": "a person who seems to evade something from their left side and run at a insane pace",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.6766944124865062,
      "Minus Multimodal Distance": -8.19778060913086,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00047996154171414673,
      "MoBERT-F": 0.5002375062346511,
      "MoBERT-N": 0.5577389791356391,
      "MoBERT-min(F/N)": 0.5002375062346511,
      "MoBERT-max(F/N)": 0.5577389791356391,
      "MotionCritic": -8.078863143920898,
      "VeMo (human-opt view)": 0.6513761467889908,
      "VeMo (max entropy view)": 0.6513761467889908,
      "VeMo (min entropy view)": 0.7549407114624506,
      "VeMo (random view)": 0.6513761467889908,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who seems to evade something from their left side and run at an insane pace."
  },
  "004818": {
    "text": "the man walk forward and move the right hand to the right side and went back.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.674526824469597,
      "Minus Multimodal Distance": -6.5542893409729,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.350693102926016e-05,
      "MoBERT-F": 0.369941432640308,
      "MoBERT-N": 0.46224936010025375,
      "MoBERT-min(F/N)": 0.369941432640308,
      "MoBERT-max(F/N)": 0.46224936010025375,
      "MotionCritic": -11.023258209228516,
      "VeMo (human-opt view)": 0.8076923076923077,
      "VeMo (max entropy view)": 0.8076923076923077,
      "VeMo (min entropy view)": 0.8517745302713987,
      "VeMo (random view)": 0.8076923076923077,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man walked forward, moved his right hand to the right side, and then went back."
  },
  "013980": {
    "text": "a person slowly walked backward, and sat on the knee and hands",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8695958818275592,
      "Minus Multimodal Distance": -7.454138278961182,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 8.671040268382058e-05,
      "MoBERT-F": 0.39376216391948227,
      "MoBERT-N": 0.49685188513075873,
      "MoBERT-min(F/N)": 0.39376216391948227,
      "MoBERT-max(F/N)": 0.49685188513075873,
      "MotionCritic": -5.146115303039551,
      "VeMo (human-opt view)": 0.9625668449197861,
      "VeMo (max entropy view)": 0.9625668449197861,
      "VeMo (min entropy view)": 0.9830140946873871,
      "VeMo (random view)": 0.9830140946873871,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walked slowly backward and sat on one knee and their hands."
  },
  "001008": {
    "text": "a person walks forward then turns completely around and does a cartwheel.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9615236080858561,
      "Minus Multimodal Distance": -6.528578281402588,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.762779176235199,
      "MoBERT-F": 0.6170273696787026,
      "MoBERT-N": 0.7165897890050729,
      "MoBERT-min(F/N)": 0.6170273696787026,
      "MoBERT-max(F/N)": 0.7165897890050729,
      "MotionCritic": -13.548534393310547,
      "VeMo (human-opt view)": 0.7426470588235294,
      "VeMo (max entropy view)": 0.515625,
      "VeMo (min entropy view)": 0.7426470588235294,
      "VeMo (random view)": 0.515625,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, then turns completely around and does a cartwheel."
  },
  "004444": {
    "text": "the person picks something up, tilts it then puts it back down",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.19692189513313993,
      "Minus Multimodal Distance": -7.926166534423828,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3311860786634497e-05,
      "MoBERT-F": 0.33028404232371533,
      "MoBERT-N": 0.43579378184057943,
      "MoBERT-min(F/N)": 0.33028404232371533,
      "MoBERT-max(F/N)": 0.43579378184057943,
      "MotionCritic": -3.302962303161621,
      "VeMo (human-opt view)": 0.11911911911911911,
      "VeMo (max entropy view)": 0.11911911911911911,
      "VeMo (min entropy view)": 0.03733650416171225,
      "VeMo (random view)": 0.03733650416171225,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person picks something up, tilts it, then puts it back down."
  },
  "007666": {
    "text": "the person lifts his right hand up and puts it back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2436339705780512,
      "Minus Multimodal Distance": -5.8689188957214355,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.6344855945790187e-05,
      "MoBERT-F": 0.2778695502967147,
      "MoBERT-N": 0.3940502685260864,
      "MoBERT-min(F/N)": 0.2778695502967147,
      "MoBERT-max(F/N)": 0.3940502685260864,
      "MotionCritic": -8.377153396606445,
      "VeMo (human-opt view)": 0.9706591070163005,
      "VeMo (max entropy view)": 0.8807692307692307,
      "VeMo (min entropy view)": 0.9706591070163005,
      "VeMo (random view)": 0.8807692307692307,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person lifts his right hand up and puts it back down."
  },
  "011127": {
    "text": "a person touches toward his pelvis, then extends hand and raises it.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.21888993127575293,
      "Minus Multimodal Distance": -9.30368423461914,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 8.160052675521001e-05,
      "MoBERT-F": 0.2613599303488517,
      "MoBERT-N": 0.3535695373398757,
      "MoBERT-min(F/N)": 0.2613599303488517,
      "MoBERT-max(F/N)": 0.3535695373398757,
      "MotionCritic": -6.976520538330078,
      "VeMo (human-opt view)": 0.9242310577644411,
      "VeMo (max entropy view)": 0.8806539509536785,
      "VeMo (min entropy view)": 0.9242310577644411,
      "VeMo (random view)": 0.8806539509536785,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person touches his pelvis, then extends his hand and raises it."
  },
  "008461": {
    "text": "person walks in a semi circular shape while swinging arms slightly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2246738267117778,
      "Minus Multimodal Distance": -5.743741035461426,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.621076226234436,
      "MoBERT-F": 0.6892155485162732,
      "MoBERT-N": 0.720722632732034,
      "MoBERT-min(F/N)": 0.6892155485162732,
      "MoBERT-max(F/N)": 0.720722632732034,
      "MotionCritic": -15.375870704650879,
      "VeMo (human-opt view)": 0.9241379310344827,
      "VeMo (max entropy view)": 0.9152,
      "VeMo (min entropy view)": 0.9241379310344827,
      "VeMo (random view)": 0.9152,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks in a semi - circular shape while swinging their arms slightly."
  },
  "006836": {
    "text": "the person turns on the faucet to wash the dishes",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.44158005125892885,
      "Minus Multimodal Distance": -4.770894527435303,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6630716092768125e-05,
      "MoBERT-F": 0.35719571116637816,
      "MoBERT-N": 0.5073650105174368,
      "MoBERT-min(F/N)": 0.35719571116637816,
      "MoBERT-max(F/N)": 0.5073650105174368,
      "MotionCritic": -3.9834160804748535,
      "VeMo (human-opt view)": 1.2273223177285383e-05,
      "VeMo (max entropy view)": 2.2828519847268923e-05,
      "VeMo (min entropy view)": 1.2273223177285383e-05,
      "VeMo (random view)": 1.2273223177285383e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person turns on the faucet to wash the dishes."
  },
  "012584": {
    "text": "a person reaches his left arm straight out",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.16161387093346,
      "Minus Multimodal Distance": -4.012157440185547,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.2792824388016015e-05,
      "MoBERT-F": 0.28918527219493667,
      "MoBERT-N": 0.4159896681943208,
      "MoBERT-min(F/N)": 0.28918527219493667,
      "MoBERT-max(F/N)": 0.4159896681943208,
      "MotionCritic": -6.0823163986206055,
      "VeMo (human-opt view)": 0.9496204278812974,
      "VeMo (max entropy view)": 0.9496204278812974,
      "VeMo (min entropy view)": 0.9578050301392642,
      "VeMo (random view)": 0.9578050301392642,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person reaches his left arm straight out."
  },
  "006281": {
    "text": "a person rolls his right ankle while walking",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8723852468286409,
      "Minus Multimodal Distance": -3.713819742202759,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9196138381958008,
      "MoBERT-F": 0.627208982378668,
      "MoBERT-N": 0.5446388218245839,
      "MoBERT-min(F/N)": 0.5446388218245839,
      "MoBERT-max(F/N)": 0.627208982378668,
      "MotionCritic": -6.787796497344971,
      "VeMo (human-opt view)": 0.6518324607329843,
      "VeMo (max entropy view)": 0.5614617940199336,
      "VeMo (min entropy view)": 0.6518324607329843,
      "VeMo (random view)": 0.5614617940199336,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person rolls their right ankle while walking."
  },
  "008131": {
    "text": "moving there hands and do like a exericses.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4831719647968008,
      "Minus Multimodal Distance": -4.284007549285889,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.3158685255330056e-05,
      "MoBERT-F": 0.33685132294764375,
      "MoBERT-N": 0.3981328086711015,
      "MoBERT-min(F/N)": 0.33685132294764375,
      "MoBERT-max(F/N)": 0.3981328086711015,
      "MotionCritic": -6.59347677230835,
      "VeMo (human-opt view)": 0.9706693406868889,
      "VeMo (max entropy view)": 0.9553708833487227,
      "VeMo (min entropy view)": 0.9706693406868889,
      "VeMo (random view)": 0.9706693406868889,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is moving their hands and doing something like exercises."
  },
  "007806": {
    "text": "a person walking slowly across something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5388362378017649,
      "Minus Multimodal Distance": -8.77561092376709,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.274650614708662e-05,
      "MoBERT-F": 0.3347100981521488,
      "MoBERT-N": 0.4357014780937624,
      "MoBERT-min(F/N)": 0.3347100981521488,
      "MoBERT-max(F/N)": 0.4357014780937624,
      "MotionCritic": -4.204151153564453,
      "VeMo (human-opt view)": 0.9755035383777899,
      "VeMo (max entropy view)": 0.9755035383777899,
      "VeMo (min entropy view)": 0.9808623838640514,
      "VeMo (random view)": 0.9755035383777899,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking slowly across something."
  },
  "004206": {
    "text": "a person stretches their leg by lunging back and forth on it, stretches their arms one at a time by lifting them and bending at the waist in the opposite direction, then finally walks forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6336736709584618,
      "Minus Multimodal Distance": -5.564632892608643,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.010800950229167938,
      "MoBERT-F": 0.5087262326589302,
      "MoBERT-N": 0.5194666105799379,
      "MoBERT-min(F/N)": 0.5087262326589302,
      "MoBERT-max(F/N)": 0.5194666105799379,
      "MotionCritic": -6.996235370635986,
      "VeMo (human-opt view)": 0.8266405484818805,
      "VeMo (max entropy view)": 0.8266405484818805,
      "VeMo (min entropy view)": 0.8358714043993232,
      "VeMo (random view)": 0.8358714043993232,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stretches their leg by lunging back and forth. They stretch their arms one at a time by lifting them and bending at the waist in the opposite direction. Then, finally, they walk forward."
  },
  "013475": {
    "text": "the person balances on their right toe, then switches to the left and spins in a circle",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0183371651893758,
      "Minus Multimodal Distance": -8.013790130615234,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.022343939170241356,
      "MoBERT-F": 0.6169943832453735,
      "MoBERT-N": 0.5536944304212136,
      "MoBERT-min(F/N)": 0.5536944304212136,
      "MoBERT-max(F/N)": 0.6169943832453735,
      "MotionCritic": -6.855922698974609,
      "VeMo (human-opt view)": 0.5309734513274337,
      "VeMo (max entropy view)": 0.5309734513274337,
      "VeMo (min entropy view)": 0.5473251028806584,
      "VeMo (random view)": 0.5309734513274337,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person balances on their right toe, then switches to the left and spins in a circle."
  },
  "004601": {
    "text": "someone is climbing a ladder,  they walk up 3 steps and then back down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2289876462459328,
      "Minus Multimodal Distance": -9.616764068603516,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00023692082322668284,
      "MoBERT-F": 0.5772980676000453,
      "MoBERT-N": 0.5639051716669075,
      "MoBERT-min(F/N)": 0.5639051716669075,
      "MoBERT-max(F/N)": 0.5772980676000453,
      "MotionCritic": -6.537121295928955,
      "VeMo (human-opt view)": 4.3888523151195965e-05,
      "VeMo (max entropy view)": 5.313723769561715e-05,
      "VeMo (min entropy view)": 4.3888523151195965e-05,
      "VeMo (random view)": 4.3888523151195965e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone is climbing a ladder. They walk up 3 steps and then walk back down."
  },
  "002899": {
    "text": "a person is leaning forward and making rapid movements with their right arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7457119627551098,
      "Minus Multimodal Distance": -2.3899929523468018,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6961277399095707e-05,
      "MoBERT-F": 0.30234824688693135,
      "MoBERT-N": 0.3914113841803219,
      "MoBERT-min(F/N)": 0.30234824688693135,
      "MoBERT-max(F/N)": 0.3914113841803219,
      "MotionCritic": -2.7119274139404297,
      "VeMo (human-opt view)": 4.1491476713778674e-05,
      "VeMo (max entropy view)": 0.0001539571026893875,
      "VeMo (min entropy view)": 4.1491476713778674e-05,
      "VeMo (random view)": 4.1491476713778674e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is leaning forward and making rapid movements with their right arm."
  },
  "007626": {
    "text": "a  man stands on the ground ,walks anticlockwise and then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7369367327418321,
      "Minus Multimodal Distance": -11.587871551513672,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5138338804244995,
      "MoBERT-F": 0.579022468106602,
      "MoBERT-N": 0.6370720494488058,
      "MoBERT-min(F/N)": 0.579022468106602,
      "MoBERT-max(F/N)": 0.6370720494488058,
      "MotionCritic": -12.662727355957031,
      "VeMo (human-opt view)": 0.9150661545027742,
      "VeMo (max entropy view)": 0.880648899188876,
      "VeMo (min entropy view)": 0.9150661545027742,
      "VeMo (random view)": 0.880648899188876,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man stands on the ground, walks anticlockwise, and then stops."
  },
  "007020": {
    "text": "a man walks unsteadily forward then turns around and walks back",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5784992641955209,
      "Minus Multimodal Distance": -9.191116333007812,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00012111486284993589,
      "MoBERT-F": 0.48155147052476743,
      "MoBERT-N": 0.5676818167149493,
      "MoBERT-min(F/N)": 0.48155147052476743,
      "MoBERT-max(F/N)": 0.5676818167149493,
      "MotionCritic": -15.349897384643555,
      "VeMo (human-opt view)": 0.9360824742268041,
      "VeMo (max entropy view)": 0.7427385892116183,
      "VeMo (min entropy view)": 0.9360824742268041,
      "VeMo (random view)": 0.7427385892116183,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks unsteadily forward, then turns around and walks back."
  },
  "001648": {
    "text": "a man uses his right hand to throw somthing with force.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5796348886587506,
      "Minus Multimodal Distance": -5.183475971221924,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0009141258196905255,
      "MoBERT-F": 0.5324480264157582,
      "MoBERT-N": 0.48316631517570785,
      "MoBERT-min(F/N)": 0.48316631517570785,
      "MoBERT-max(F/N)": 0.5324480264157582,
      "MotionCritic": -11.36601734161377,
      "VeMo (human-opt view)": 0.8178438661710037,
      "VeMo (max entropy view)": 0.8178438661710037,
      "VeMo (min entropy view)": 0.8436363636363636,
      "VeMo (random view)": 0.8436363636363636,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man uses his right hand to throw something with force."
  },
  "011594": {
    "text": "a person walks forward while being assisted by hand rails.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5049220030707324,
      "Minus Multimodal Distance": -6.502047061920166,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.57660828740336e-05,
      "MoBERT-F": 0.37193621849280106,
      "MoBERT-N": 0.46016507950618846,
      "MoBERT-min(F/N)": 0.37193621849280106,
      "MoBERT-max(F/N)": 0.46016507950618846,
      "MotionCritic": -10.278453826904297,
      "VeMo (human-opt view)": 0.0004045956933036202,
      "VeMo (max entropy view)": 0.0007087525598789066,
      "VeMo (min entropy view)": 0.0004045956933036202,
      "VeMo (random view)": 0.0007087525598789066,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward while being assisted by handrails."
  },
  "007630": {
    "text": "person is holding his head because he feels dizzy.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2856176636297547,
      "Minus Multimodal Distance": -10.529038429260254,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.10862264491152e-05,
      "MoBERT-F": 0.3051686481343927,
      "MoBERT-N": 0.3602894659960723,
      "MoBERT-min(F/N)": 0.3051686481343927,
      "MoBERT-max(F/N)": 0.3602894659960723,
      "MotionCritic": -2.3228230476379395,
      "VeMo (human-opt view)": 0.9323583180987203,
      "VeMo (max entropy view)": 0.8177339901477833,
      "VeMo (min entropy view)": 0.9323583180987203,
      "VeMo (random view)": 0.9323583180987203,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is holding his head because he feels dizzy."
  },
  "006163": {
    "text": "man moves his right hand to cover his groin.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.1813582488258171,
      "Minus Multimodal Distance": -10.1563138961792,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.8689347497420385e-05,
      "MoBERT-F": 0.309686077413978,
      "MoBERT-N": 0.3822800141539714,
      "MoBERT-min(F/N)": 0.309686077413978,
      "MoBERT-max(F/N)": 0.3822800141539714,
      "MotionCritic": -5.922731399536133,
      "VeMo (human-opt view)": 0.6511627906976745,
      "VeMo (max entropy view)": 0.6511627906976745,
      "VeMo (min entropy view)": 0.30612244897959184,
      "VeMo (random view)": 0.6511627906976745,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man moves his right hand to cover his groin."
  },
  "004163": {
    "text": "he is stretching his arm then moving it",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9096308629664205,
      "Minus Multimodal Distance": -3.9214940071105957,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.3675074013881385e-05,
      "MoBERT-F": 0.27889934942078987,
      "MoBERT-N": 0.3921768584915303,
      "MoBERT-min(F/N)": 0.27889934942078987,
      "MoBERT-max(F/N)": 0.3921768584915303,
      "MotionCritic": -4.799291610717773,
      "VeMo (human-opt view)": 0.9890085972358255,
      "VeMo (max entropy view)": 0.986784140969163,
      "VeMo (min entropy view)": 0.9890085972358255,
      "VeMo (random view)": 0.9890085972358255,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "He is stretching his arm and then moving it."
  },
  "003677": {
    "text": "person stretches arms out and makes arm circles.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3131774464604974,
      "Minus Multimodal Distance": -5.782185077667236,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.536724398145452e-05,
      "MoBERT-F": 0.4272436027206488,
      "MoBERT-N": 0.4172681188492162,
      "MoBERT-min(F/N)": 0.4172681188492162,
      "MoBERT-max(F/N)": 0.4272436027206488,
      "MotionCritic": -12.095237731933594,
      "VeMo (human-opt view)": 0.8989399293286219,
      "VeMo (max entropy view)": 0.8989399293286219,
      "VeMo (min entropy view)": 0.9194936708860759,
      "VeMo (random view)": 0.8989399293286219,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stretches their arms out and makes arm circles."
  },
  "013444": {
    "text": "a person bends over and flaps his arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6219735284411921,
      "Minus Multimodal Distance": -6.092981338500977,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0004557542852126062,
      "MoBERT-F": 0.4428466384813145,
      "MoBERT-N": 0.4830569672979973,
      "MoBERT-min(F/N)": 0.4428466384813145,
      "MoBERT-max(F/N)": 0.4830569672979973,
      "MotionCritic": -8.912139892578125,
      "VeMo (human-opt view)": 0.453551912568306,
      "VeMo (max entropy view)": 0.453551912568306,
      "VeMo (min entropy view)": 0.7666034155597723,
      "VeMo (random view)": 0.453551912568306,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends over and flaps his arms."
  },
  "001052": {
    "text": "a person who performed a left handed uppercut",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8474209469410603,
      "Minus Multimodal Distance": -10.012581825256348,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.450966323725879e-05,
      "MoBERT-F": 0.3648147729676202,
      "MoBERT-N": 0.44443964191552743,
      "MoBERT-min(F/N)": 0.3648147729676202,
      "MoBERT-max(F/N)": 0.44443964191552743,
      "MotionCritic": -1.372635841369629,
      "VeMo (human-opt view)": 0.39226519337016574,
      "VeMo (max entropy view)": 0.39226519337016574,
      "VeMo (min entropy view)": 0.37799043062200954,
      "VeMo (random view)": 0.37799043062200954,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who performed a left - handed uppercut."
  },
  "010553": {
    "text": "a person is walking slowly in zigzag",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8454513838924789,
      "Minus Multimodal Distance": -4.620778560638428,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6391699066152796e-05,
      "MoBERT-F": 0.5041478037480973,
      "MoBERT-N": 0.6141601639979009,
      "MoBERT-min(F/N)": 0.5041478037480973,
      "MoBERT-max(F/N)": 0.6141601639979009,
      "MotionCritic": -2.085526943206787,
      "VeMo (human-opt view)": 0.5920245398773006,
      "VeMo (max entropy view)": 0.5920245398773006,
      "VeMo (min entropy view)": 0.7554179566563467,
      "VeMo (random view)": 0.5920245398773006,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking slowly in a zigzag."
  },
  "013371": {
    "text": "a person swings something with both hands",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.917581144543978,
      "Minus Multimodal Distance": -6.413411617279053,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00017385966202709824,
      "MoBERT-F": 0.4970582500651338,
      "MoBERT-N": 0.5112480277852783,
      "MoBERT-min(F/N)": 0.4970582500651338,
      "MoBERT-max(F/N)": 0.5112480277852783,
      "MotionCritic": -10.429473876953125,
      "VeMo (human-opt view)": 0.6367713004484304,
      "VeMo (max entropy view)": 0.5770308123249299,
      "VeMo (min entropy view)": 0.6367713004484304,
      "VeMo (random view)": 0.5770308123249299,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person swings something with both hands."
  },
  "003193": {
    "text": "a person runs back and forth",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.013322248067352,
      "Minus Multimodal Distance": -11.585679054260254,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9771196246147156,
      "MoBERT-F": 0.7447447801564555,
      "MoBERT-N": 0.7177551300281152,
      "MoBERT-min(F/N)": 0.7177551300281152,
      "MoBERT-max(F/N)": 0.7447447801564555,
      "MotionCritic": -9.035431861877441,
      "VeMo (human-opt view)": 0.9323692045937899,
      "VeMo (max entropy view)": 0.9323692045937899,
      "VeMo (min entropy view)": 0.9433419614610297,
      "VeMo (random view)": 0.9433419614610297,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs back and forth."
  },
  "001882": {
    "text": "a person walks forward then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.36517978617818875,
      "Minus Multimodal Distance": -12.346088409423828,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6882562451646663e-05,
      "MoBERT-F": 0.36460103325972537,
      "MoBERT-N": 0.4927384607137959,
      "MoBERT-min(F/N)": 0.36460103325972537,
      "MoBERT-max(F/N)": 0.4927384607137959,
      "MotionCritic": -4.084526538848877,
      "VeMo (human-opt view)": 0.9968184624151194,
      "VeMo (max entropy view)": 0.9809523809523809,
      "VeMo (min entropy view)": 0.9968184624151194,
      "VeMo (random view)": 0.9809523809523809,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and then stops."
  },
  "003603": {
    "text": "person kneeling down qith stool",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5782139815613763,
      "Minus Multimodal Distance": -3.908712387084961,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.37217748165130615,
      "MoBERT-F": 0.48991651516756873,
      "MoBERT-N": 0.5396631011722879,
      "MoBERT-min(F/N)": 0.48991651516756873,
      "MoBERT-max(F/N)": 0.5396631011722879,
      "MotionCritic": -8.479358673095703,
      "VeMo (human-opt view)": 0.004315239609357257,
      "VeMo (max entropy view)": 0.004315239609357257,
      "VeMo (min entropy view)": 0.002176479792878786,
      "VeMo (random view)": 0.002176479792878786,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "person kneeling down with stool"
  },
  "012561": {
    "text": "peron moves forward with arms shoulder height then fully turns arms to the back and moves further forward then turns to side and moves back quickly",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9765021576393121,
      "Minus Multimodal Distance": -3.319870710372925,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0011984205339103937,
      "MoBERT-F": 0.5272724360889343,
      "MoBERT-N": 0.5721509920889293,
      "MoBERT-min(F/N)": 0.5272724360889343,
      "MoBERT-max(F/N)": 0.5721509920889293,
      "MotionCritic": -13.408488273620605,
      "VeMo (human-opt view)": 0.6797202797202797,
      "VeMo (max entropy view)": 0.6378205128205128,
      "VeMo (min entropy view)": 0.6797202797202797,
      "VeMo (random view)": 0.6378205128205128,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person moves forward with their arms at shoulder height. Then, they fully turn their arms to the back and move further forward. After that, they turn to the side and move back quickly."
  },
  "001755": {
    "text": "person moves backwards towards the left then right as though he is playing a sport game like football or baseketball",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7706651399555552,
      "Minus Multimodal Distance": -9.725861549377441,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9975812435150146,
      "MoBERT-F": 0.6391381310870957,
      "MoBERT-N": 0.5356961936722613,
      "MoBERT-min(F/N)": 0.5356961936722613,
      "MoBERT-max(F/N)": 0.6391381310870957,
      "MotionCritic": -4.146007537841797,
      "VeMo (human-opt view)": 0.7880386983289358,
      "VeMo (max entropy view)": 0.7880386983289358,
      "VeMo (min entropy view)": 0.8518189884649512,
      "VeMo (random view)": 0.8518189884649512,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person moves backwards, first towards the left and then to the right as though he is playing a sport such as football or basketball."
  },
  "001298": {
    "text": "someone stands with their hands on their upper legs spread apart, then straightens up and outstretches both arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.17351677799447207,
      "Minus Multimodal Distance": -10.380207061767578,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0004013408033642918,
      "MoBERT-F": 0.46798415262123405,
      "MoBERT-N": 0.40732898810647045,
      "MoBERT-min(F/N)": 0.40732898810647045,
      "MoBERT-max(F/N)": 0.46798415262123405,
      "MotionCritic": -16.333293914794922,
      "VeMo (human-opt view)": 0.9950794257083155,
      "VeMo (max entropy view)": 0.9467994310099573,
      "VeMo (min entropy view)": 0.9950794257083155,
      "VeMo (random view)": 0.9467994310099573,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone stands with their hands on their upper legs, legs spread apart, then straightens up and outstretches both arms."
  },
  "012020": {
    "text": "a person, standing still with their arms at their sides, seems to wince/twitch, then continues standing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.09460909514590106,
      "Minus Multimodal Distance": -5.562114238739014,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.485217868932523e-05,
      "MoBERT-F": 0.270297845985194,
      "MoBERT-N": 0.3906416233567973,
      "MoBERT-min(F/N)": 0.270297845985194,
      "MoBERT-max(F/N)": 0.3906416233567973,
      "MotionCritic": -1.386323094367981,
      "VeMo (human-opt view)": 0.4084507042253521,
      "VeMo (max entropy view)": 0.4084507042253521,
      "VeMo (min entropy view)": 0.37752161383285304,
      "VeMo (random view)": 0.37752161383285304,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person, standing still with their arms at their sides, seems to wince or twitch, then continues standing."
  },
  "010897": {
    "text": "a person puts their hands on their hips and turns to the left side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4861186938810204,
      "Minus Multimodal Distance": -5.450695514678955,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8487986128311604e-05,
      "MoBERT-F": 0.31233719446817465,
      "MoBERT-N": 0.3791947693786682,
      "MoBERT-min(F/N)": 0.31233719446817465,
      "MoBERT-max(F/N)": 0.3791947693786682,
      "MotionCritic": -1.0063965320587158,
      "VeMo (human-opt view)": 0.9648757016840417,
      "VeMo (max entropy view)": 0.9322381930184805,
      "VeMo (min entropy view)": 0.9648757016840417,
      "VeMo (random view)": 0.9648757016840417,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person puts their hands on their hips and turns to the left side."
  },
  "003823": {
    "text": "person scatches head and armpit like a monkey then pretends to hold a baby",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.572425211585676,
      "Minus Multimodal Distance": -6.4989423751831055,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0003022993914783001,
      "MoBERT-F": 0.45200573985269216,
      "MoBERT-N": 0.5019565608162218,
      "MoBERT-min(F/N)": 0.45200573985269216,
      "MoBERT-max(F/N)": 0.5019565608162218,
      "MotionCritic": -2.547605037689209,
      "VeMo (human-opt view)": 0.060240963855421686,
      "VeMo (max entropy view)": 0.060240963855421686,
      "VeMo (min entropy view)": 0.015846225972808253,
      "VeMo (random view)": 0.060240963855421686,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person scratches their head and armpit like a monkey, then pretends to hold a baby."
  },
  "009443": {
    "text": "a person attempts to get a rock out of their shoe.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7214050623658614,
      "Minus Multimodal Distance": -10.25155258178711,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.22978317737579346,
      "MoBERT-F": 0.6033801135946935,
      "MoBERT-N": 0.5829036478857816,
      "MoBERT-min(F/N)": 0.5829036478857816,
      "MoBERT-max(F/N)": 0.6033801135946935,
      "MotionCritic": -5.378231525421143,
      "VeMo (human-opt view)": 0.7661870503597122,
      "VeMo (max entropy view)": 0.37777777777777777,
      "VeMo (min entropy view)": 0.7661870503597122,
      "VeMo (random view)": 0.7661870503597122,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person attempts to get a rock out of their shoe."
  },
  "006518": {
    "text": "a person boastfully meanders across a room in a confident manner.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8490999390563461,
      "Minus Multimodal Distance": -10.380035400390625,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00014649273362010717,
      "MoBERT-F": 0.543624327091863,
      "MoBERT-N": 0.5167277040676657,
      "MoBERT-min(F/N)": 0.5167277040676657,
      "MoBERT-max(F/N)": 0.543624327091863,
      "MotionCritic": -5.595153331756592,
      "VeMo (human-opt view)": 0.8267716535433071,
      "VeMo (max entropy view)": 0.6651480637813212,
      "VeMo (min entropy view)": 0.8267716535433071,
      "VeMo (random view)": 0.6651480637813212,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person boastfully meanders across a room in a confident manner."
  },
  "013777": {
    "text": "a man sits down with crossed legs then gets up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5766861604869218,
      "Minus Multimodal Distance": -9.751337051391602,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.29477208852767944,
      "MoBERT-F": 0.5041561172983473,
      "MoBERT-N": 0.5353406310591953,
      "MoBERT-min(F/N)": 0.5041561172983473,
      "MoBERT-max(F/N)": 0.5353406310591953,
      "MotionCritic": -4.875979900360107,
      "VeMo (human-opt view)": 0.993704537299801,
      "VeMo (max entropy view)": 0.910158013544018,
      "VeMo (min entropy view)": 0.993704537299801,
      "VeMo (random view)": 0.993704537299801,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man sits down with crossed legs and then gets up."
  },
  "001640": {
    "text": "person looks to be washing a window with both hands",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.28622075836129823,
      "Minus Multimodal Distance": -7.540012359619141,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.996120358351618e-05,
      "MoBERT-F": 0.31289430080405145,
      "MoBERT-N": 0.37702905617039584,
      "MoBERT-min(F/N)": 0.31289430080405145,
      "MoBERT-max(F/N)": 0.37702905617039584,
      "MotionCritic": -8.699910163879395,
      "VeMo (human-opt view)": 0.7872340425531915,
      "VeMo (max entropy view)": 0.6926070038910506,
      "VeMo (min entropy view)": 0.7872340425531915,
      "VeMo (random view)": 0.7872340425531915,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person looks to be washing a window with both hands."
  },
  "010909": {
    "text": "a person walks forward and then walks up some steps.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4498038627592859,
      "Minus Multimodal Distance": -5.236813545227051,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.614794357214123e-05,
      "MoBERT-F": 0.5103030834374952,
      "MoBERT-N": 0.5717841962559994,
      "MoBERT-min(F/N)": 0.5103030834374952,
      "MoBERT-max(F/N)": 0.5717841962559994,
      "MotionCritic": -6.158167362213135,
      "VeMo (human-opt view)": 0.256198347107438,
      "VeMo (max entropy view)": 0.256198347107438,
      "VeMo (min entropy view)": 0.19242902208201892,
      "VeMo (random view)": 0.19242902208201892,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward and then walks up some steps."
  },
  "006610": {
    "text": "a person stomps the ground with their left foot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.20798793234759644,
      "Minus Multimodal Distance": -3.4197256565093994,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.1099127227207646e-05,
      "MoBERT-F": 0.30833009247679294,
      "MoBERT-N": 0.4616882614674657,
      "MoBERT-min(F/N)": 0.30833009247679294,
      "MoBERT-max(F/N)": 0.4616882614674657,
      "MotionCritic": -4.881282329559326,
      "VeMo (human-opt view)": 0.578088578088578,
      "VeMo (max entropy view)": 0.578088578088578,
      "VeMo (min entropy view)": 0.19236016371077763,
      "VeMo (random view)": 0.19236016371077763,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stomps the ground with their left foot."
  },
  "013068": {
    "text": "the person is walking but struggling with the knee",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9360674795578071,
      "Minus Multimodal Distance": -4.7714643478393555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4131244572345167e-05,
      "MoBERT-F": 0.42485483473516483,
      "MoBERT-N": 0.5936818321395227,
      "MoBERT-min(F/N)": 0.42485483473516483,
      "MoBERT-max(F/N)": 0.5936818321395227,
      "MotionCritic": -10.597799301147461,
      "VeMo (human-opt view)": 0.9195402298850575,
      "VeMo (max entropy view)": 0.8174672489082969,
      "VeMo (min entropy view)": 0.9195402298850575,
      "VeMo (random view)": 0.9195402298850575,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking but struggling with their knee."
  },
  "005278": {
    "text": "the man walks while holding onto the rail.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6036823699326548,
      "Minus Multimodal Distance": -5.153619766235352,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.5156677515478805e-05,
      "MoBERT-F": 0.27367696948656617,
      "MoBERT-N": 0.41951982341008526,
      "MoBERT-min(F/N)": 0.27367696948656617,
      "MoBERT-max(F/N)": 0.41951982341008526,
      "MotionCritic": -11.448518753051758,
      "VeMo (human-opt view)": 3.42641768031523e-05,
      "VeMo (max entropy view)": 3.42641768031523e-05,
      "VeMo (min entropy view)": 2.75949208464044e-05,
      "VeMo (random view)": 2.75949208464044e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man walks while holding onto the rail."
  },
  "002761": {
    "text": "a person walks turning to the left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.19365917199228538,
      "Minus Multimodal Distance": -6.093698024749756,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.50960420089541e-05,
      "MoBERT-F": 0.40083278119560706,
      "MoBERT-N": 0.5517992743499065,
      "MoBERT-min(F/N)": 0.40083278119560706,
      "MoBERT-max(F/N)": 0.5517992743499065,
      "MotionCritic": -8.860527992248535,
      "VeMo (human-opt view)": 0.955421936554012,
      "VeMo (max entropy view)": 0.955421936554012,
      "VeMo (min entropy view)": 0.97556434721899,
      "VeMo (random view)": 0.955421936554012,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks while turning to the left."
  },
  "005077": {
    "text": "a person sidesteps to their left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.45814659560317117,
      "Minus Multimodal Distance": -5.739118576049805,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.284225699142553e-05,
      "MoBERT-F": 0.4616261170259034,
      "MoBERT-N": 0.5984938473720294,
      "MoBERT-min(F/N)": 0.4616261170259034,
      "MoBERT-max(F/N)": 0.5984938473720294,
      "MotionCritic": -3.5424795150756836,
      "VeMo (human-opt view)": 0.9525909592061742,
      "VeMo (max entropy view)": 0.9195402298850575,
      "VeMo (min entropy view)": 0.9525909592061742,
      "VeMo (random view)": 0.9525909592061742,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sidesteps to their left."
  },
  "009890": {
    "text": "moving hands to knees.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7306593454913334,
      "Minus Multimodal Distance": -11.901155471801758,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.460879815975204e-05,
      "MoBERT-F": 0.40261519115258837,
      "MoBERT-N": 0.45480073801382237,
      "MoBERT-min(F/N)": 0.40261519115258837,
      "MoBERT-max(F/N)": 0.45480073801382237,
      "MotionCritic": -9.841093063354492,
      "VeMo (human-opt view)": 0.8813114754098361,
      "VeMo (max entropy view)": 0.8813114754098361,
      "VeMo (min entropy view)": 0.9467455621301775,
      "VeMo (random view)": 0.8813114754098361,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves hands to knees."
  },
  "009385": {
    "text": "person squats all the way to the ground, then leaps up all the way, raising both hands above the head.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.773412173557227,
      "Minus Multimodal Distance": -2.7681210041046143,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9714211225509644,
      "MoBERT-F": 0.7197513587034434,
      "MoBERT-N": 0.6770847749652863,
      "MoBERT-min(F/N)": 0.6770847749652863,
      "MoBERT-max(F/N)": 0.7197513587034434,
      "MotionCritic": -9.018156051635742,
      "VeMo (human-opt view)": 0.778021978021978,
      "VeMo (max entropy view)": 0.778021978021978,
      "VeMo (min entropy view)": 0.8266666666666667,
      "VeMo (random view)": 0.8266666666666667,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person squats all the way to the ground, then leaps up all the way, raising both hands above the head."
  },
  "000609": {
    "text": "a person slowly walks in a circle pattern as though they are ballroom dancing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1231681969085243,
      "Minus Multimodal Distance": -11.485304832458496,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0005354920867830515,
      "MoBERT-F": 0.531765569554524,
      "MoBERT-N": 0.4683235311835129,
      "MoBERT-min(F/N)": 0.4683235311835129,
      "MoBERT-max(F/N)": 0.531765569554524,
      "MotionCritic": -13.405717849731445,
      "VeMo (human-opt view)": 0.6916299559471366,
      "VeMo (max entropy view)": 0.6218274111675127,
      "VeMo (min entropy view)": 0.6916299559471366,
      "VeMo (random view)": 0.6218274111675127,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person slowly walks in a circular pattern as though they are ballroom dancing."
  },
  "003105": {
    "text": "a person standing up and greeting someone with a respectful hand to forehead gesture.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.22502874942592888,
      "Minus Multimodal Distance": -3.7684357166290283,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.003565845778212e-05,
      "MoBERT-F": 0.2730785230400779,
      "MoBERT-N": 0.3645885768202341,
      "MoBERT-min(F/N)": 0.2730785230400779,
      "MoBERT-max(F/N)": 0.3645885768202341,
      "MotionCritic": -5.463443756103516,
      "VeMo (human-opt view)": 0.9840476648087642,
      "VeMo (max entropy view)": 0.9840476648087642,
      "VeMo (min entropy view)": 0.00299923899905994,
      "VeMo (random view)": 0.00299923899905994,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands up and greets someone with a respectful hand - to - forehead gesture."
  },
  "012310": {
    "text": "the toon walks forward a few steps, then turns around walking back.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.552121097677221,
      "Minus Multimodal Distance": -4.630958557128906,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2690132027491927e-05,
      "MoBERT-F": 0.36270197287576583,
      "MoBERT-N": 0.4990529378595478,
      "MoBERT-min(F/N)": 0.36270197287576583,
      "MoBERT-max(F/N)": 0.4990529378595478,
      "MotionCritic": -15.507054328918457,
      "VeMo (human-opt view)": 0.9919465126880413,
      "VeMo (max entropy view)": 0.9325899645210339,
      "VeMo (min entropy view)": 0.9919465126880413,
      "VeMo (random view)": 0.9919465126880413,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The toon walks forward a few steps, then turns around and walks back."
  },
  "001171": {
    "text": "the person is getting a shower and washing his right arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5198106793704491,
      "Minus Multimodal Distance": -6.723852634429932,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6148240067414008e-05,
      "MoBERT-F": 0.3136443160945756,
      "MoBERT-N": 0.3918044351301942,
      "MoBERT-min(F/N)": 0.3136443160945756,
      "MoBERT-max(F/N)": 0.3918044351301942,
      "MotionCritic": -9.091154098510742,
      "VeMo (human-opt view)": 0.000277922281528145,
      "VeMo (max entropy view)": 0.0007110596750702477,
      "VeMo (min entropy view)": 0.000277922281528145,
      "VeMo (random view)": 0.0007110596750702477,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is taking a shower and washing his right arm."
  },
  "010072": {
    "text": "a figure walks down and right to stare at a wall",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5764306908531204,
      "Minus Multimodal Distance": -10.163799285888672,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.04532472405117e-05,
      "MoBERT-F": 0.33710513493755434,
      "MoBERT-N": 0.5149118278693181,
      "MoBERT-min(F/N)": 0.33710513493755434,
      "MoBERT-max(F/N)": 0.5149118278693181,
      "MotionCritic": -1.1253348588943481,
      "VeMo (human-opt view)": 0.9197080291970803,
      "VeMo (max entropy view)": 0.9197080291970803,
      "VeMo (min entropy view)": 0.946712802768166,
      "VeMo (random view)": 0.9197080291970803,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure walks down and to the right to stare at a wall."
  },
  "009954": {
    "text": "a person slowly walks forward with hands down at sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.24472662947820642,
      "Minus Multimodal Distance": -10.086492538452148,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.282201552996412e-05,
      "MoBERT-F": 0.34687860941145143,
      "MoBERT-N": 0.4626634897208725,
      "MoBERT-min(F/N)": 0.34687860941145143,
      "MoBERT-max(F/N)": 0.4626634897208725,
      "MotionCritic": -4.314567565917969,
      "VeMo (human-opt view)": 0.9890404478435701,
      "VeMo (max entropy view)": 0.9875761652290892,
      "VeMo (min entropy view)": 0.9890404478435701,
      "VeMo (random view)": 0.9875761652290892,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person slowly walks forward with hands down at the sides."
  },
  "011991": {
    "text": "a person appears to be pushing against a wall.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0583247889884477,
      "Minus Multimodal Distance": -11.389978408813477,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3218699425342493e-05,
      "MoBERT-F": 0.4391947528213749,
      "MoBERT-N": 0.49073943698643147,
      "MoBERT-min(F/N)": 0.4391947528213749,
      "MoBERT-max(F/N)": 0.49073943698643147,
      "MotionCritic": -3.0049381256103516,
      "VeMo (human-opt view)": 0.0001493771700677298,
      "VeMo (max entropy view)": 0.011676082862523541,
      "VeMo (min entropy view)": 0.0001493771700677298,
      "VeMo (random view)": 0.0001493771700677298,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person appears to be pushing against a wall."
  },
  "006912": {
    "text": "a person walks, stepping onto a big object.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9308911049493945,
      "Minus Multimodal Distance": -6.449612617492676,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.5565783036872745e-05,
      "MoBERT-F": 0.39944817840457153,
      "MoBERT-N": 0.5759852586311669,
      "MoBERT-min(F/N)": 0.39944817840457153,
      "MoBERT-max(F/N)": 0.5759852586311669,
      "MotionCritic": -1.3969087600708008,
      "VeMo (human-opt view)": 0.1482889733840304,
      "VeMo (max entropy view)": 0.2019115890083632,
      "VeMo (min entropy view)": 0.1482889733840304,
      "VeMo (random view)": 0.1482889733840304,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks and steps onto a big object."
  },
  "011363": {
    "text": "the man is running and scrambling",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.5304328267098264,
      "Minus Multimodal Distance": -7.390950679779053,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.20790167152881622,
      "MoBERT-F": 0.6815596926028251,
      "MoBERT-N": 0.7327841018156294,
      "MoBERT-min(F/N)": 0.6815596926028251,
      "MoBERT-max(F/N)": 0.7327841018156294,
      "MotionCritic": -12.427422523498535,
      "VeMo (human-opt view)": 0.7054108216432866,
      "VeMo (max entropy view)": 0.7054108216432866,
      "VeMo (min entropy view)": 0.7431906614785992,
      "VeMo (random view)": 0.7054108216432866,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man is running and scrambling."
  },
  "007445": {
    "text": "a person does arm curls",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.450443693112749,
      "Minus Multimodal Distance": -2.9303066730499268,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.371627306274604e-05,
      "MoBERT-F": 0.42478895089136615,
      "MoBERT-N": 0.4807128168287264,
      "MoBERT-min(F/N)": 0.42478895089136615,
      "MoBERT-max(F/N)": 0.4807128168287264,
      "MotionCritic": -6.681692600250244,
      "VeMo (human-opt view)": 0.0007087525598789066,
      "VeMo (max entropy view)": 0.0007087525598789066,
      "VeMo (min entropy view)": 0.0002459726705442051,
      "VeMo (random view)": 0.0007087525598789066,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does arm curls."
  },
  "002842": {
    "text": "a person walks forward 4 steps.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.19063330577001758,
      "Minus Multimodal Distance": -10.637959480285645,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.323927947145421e-05,
      "MoBERT-F": 0.3603046623126772,
      "MoBERT-N": 0.5210635560056802,
      "MoBERT-min(F/N)": 0.3603046623126772,
      "MoBERT-max(F/N)": 0.5210635560056802,
      "MotionCritic": -3.937952995300293,
      "VeMo (human-opt view)": 0.9649368863955119,
      "VeMo (max entropy view)": 0.9527145359019265,
      "VeMo (min entropy view)": 0.9649368863955119,
      "VeMo (random view)": 0.9527145359019265,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward 4 steps."
  },
  "002897": {
    "text": "a person jauntily skips forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.715936536591201,
      "Minus Multimodal Distance": -12.237165451049805,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9944719076156616,
      "MoBERT-F": 0.8114845342566561,
      "MoBERT-N": 0.718942665137724,
      "MoBERT-min(F/N)": 0.718942665137724,
      "MoBERT-max(F/N)": 0.8114845342566561,
      "MotionCritic": -6.266539573669434,
      "VeMo (human-opt view)": 0.2692867540029112,
      "VeMo (max entropy view)": 0.3346613545816733,
      "VeMo (min entropy view)": 0.2692867540029112,
      "VeMo (random view)": 0.3346613545816733,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person jauntily skips forward."
  },
  "000085": {
    "text": "using their left hand, the person holds the neck of an air guitar, and with their right hand, they make strumming motions.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.10775501081535638,
      "Minus Multimodal Distance": -4.657073020935059,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.962573828175664e-05,
      "MoBERT-F": 0.2677705032162958,
      "MoBERT-N": 0.3806853744150377,
      "MoBERT-min(F/N)": 0.2677705032162958,
      "MoBERT-max(F/N)": 0.3806853744150377,
      "MotionCritic": -6.866246223449707,
      "VeMo (human-opt view)": 0.21282798833819241,
      "VeMo (max entropy view)": 0.21282798833819241,
      "VeMo (min entropy view)": 0.07160493827160494,
      "VeMo (random view)": 0.21282798833819241,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Using their left hand, the person holds the neck of an air guitar, and with their right hand, they make strumming motions."
  },
  "009493": {
    "text": "a person staggers around drunk and repeatedly gestures down with their right hand, as if mumbling an imaginary argument to themselves.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9268783779483482,
      "Minus Multimodal Distance": -8.265846252441406,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.1161612898577005e-05,
      "MoBERT-F": 0.2729599898925205,
      "MoBERT-N": 0.45380218882430795,
      "MoBERT-min(F/N)": 0.2729599898925205,
      "MoBERT-max(F/N)": 0.45380218882430795,
      "MotionCritic": -9.89332103729248,
      "VeMo (human-opt view)": 0.7548022598870057,
      "VeMo (max entropy view)": 0.6924101198402131,
      "VeMo (min entropy view)": 0.7548022598870057,
      "VeMo (random view)": 0.7548022598870057,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person staggers around drunk and repeatedly gestures downward with their right hand, as if mumbling about an imaginary argument to themselves."
  },
  "003645": {
    "text": "a person pauses briefly then casually walks downward in a straight line, and stands in a relaxed pose.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3300456389602911,
      "Minus Multimodal Distance": -6.685983657836914,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.902372453827411e-05,
      "MoBERT-F": 0.34484573639080807,
      "MoBERT-N": 0.36593908302701944,
      "MoBERT-min(F/N)": 0.34484573639080807,
      "MoBERT-max(F/N)": 0.36593908302701944,
      "MotionCritic": -5.546013832092285,
      "VeMo (human-opt view)": 0.9525909592061742,
      "VeMo (max entropy view)": 0.9240246406570842,
      "VeMo (min entropy view)": 0.9525909592061742,
      "VeMo (random view)": 0.9240246406570842,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person pauses briefly, then casually walks downward in a straight line and stands in a relaxed pose."
  },
  "010810": {
    "text": "a person sitting on the floor scratches their head and then leans back on their left hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5570695387177237,
      "Minus Multimodal Distance": -7.53040075302124,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.2815314531326294,
      "MoBERT-F": 0.46496305003640404,
      "MoBERT-N": 0.4927956221133084,
      "MoBERT-min(F/N)": 0.46496305003640404,
      "MoBERT-max(F/N)": 0.4927956221133084,
      "MotionCritic": -13.095550537109375,
      "VeMo (human-opt view)": 0.78748370273794,
      "VeMo (max entropy view)": 0.78748370273794,
      "VeMo (min entropy view)": 0.9465783664459161,
      "VeMo (random view)": 0.78748370273794,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sitting on the floor scratches their head and then leans back on their left hand."
  },
  "009283": {
    "text": "someone gets up from the floor, seems to be talking on the phone and starts to walk",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0235189985573105,
      "Minus Multimodal Distance": -2.224961519241333,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.019615374505519867,
      "MoBERT-F": 0.45200279855207304,
      "MoBERT-N": 0.5062973658324788,
      "MoBERT-min(F/N)": 0.45200279855207304,
      "MoBERT-max(F/N)": 0.5062973658324788,
      "MotionCritic": -4.073846340179443,
      "VeMo (human-opt view)": 0.5925925925925926,
      "VeMo (max entropy view)": 0.4383561643835616,
      "VeMo (min entropy view)": 0.5925925925925926,
      "VeMo (random view)": 0.5925925925925926,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone gets up from the floor, seems to be talking on the phone, and starts to walk."
  },
  "011211": {
    "text": "the man is using his arms and legs to propel him self forward in a running fashion.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8127121236153257,
      "Minus Multimodal Distance": -11.913200378417969,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.7441095628309995e-05,
      "MoBERT-F": 0.5095818000235578,
      "MoBERT-N": 0.5183764759286271,
      "MoBERT-min(F/N)": 0.5095818000235578,
      "MoBERT-max(F/N)": 0.5183764759286271,
      "MotionCritic": -4.489181041717529,
      "VeMo (human-opt view)": 0.9784270426363173,
      "VeMo (max entropy view)": 0.939994978659302,
      "VeMo (min entropy view)": 0.9784270426363173,
      "VeMo (random view)": 0.9784270426363173,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man is using his arms and legs to propel himself forward in a running fashion."
  },
  "007216": {
    "text": "a person looks to be petting a dog with right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3696385434620642,
      "Minus Multimodal Distance": -10.634350776672363,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.276020288467407e-05,
      "MoBERT-F": 0.2839494703448898,
      "MoBERT-N": 0.38250946291413,
      "MoBERT-min(F/N)": 0.2839494703448898,
      "MoBERT-max(F/N)": 0.38250946291413,
      "MotionCritic": -11.478289604187012,
      "VeMo (human-opt view)": 0.0005878112349581323,
      "VeMo (max entropy view)": 0.0005878112349581323,
      "VeMo (min entropy view)": 0.0004316603225679865,
      "VeMo (random view)": 0.0005878112349581323,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person looks to be petting a dog with their right hand."
  },
  "007450": {
    "text": "the person had his hands up in his rocking back-and-forth.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7617406984893161,
      "Minus Multimodal Distance": -7.387147903442383,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7461293939268216e-05,
      "MoBERT-F": 0.3918516436523853,
      "MoBERT-N": 0.4178740382147358,
      "MoBERT-min(F/N)": 0.3918516436523853,
      "MoBERT-max(F/N)": 0.4178740382147358,
      "MotionCritic": -5.788623332977295,
      "VeMo (human-opt view)": 0.7316017316017316,
      "VeMo (max entropy view)": 0.7316017316017316,
      "VeMo (min entropy view)": 0.8813114754098361,
      "VeMo (random view)": 0.8813114754098361,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person had his hands up as he was rocking back and forth."
  },
  "002122": {
    "text": "a person stretches their back by turning their arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5796993716317025,
      "Minus Multimodal Distance": -8.477640151977539,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.985515599604696e-05,
      "MoBERT-F": 0.5212641780720303,
      "MoBERT-N": 0.4706640248030901,
      "MoBERT-min(F/N)": 0.4706640248030901,
      "MoBERT-max(F/N)": 0.5212641780720303,
      "MotionCritic": -10.505454063415527,
      "VeMo (human-opt view)": 0.7772795216741405,
      "VeMo (max entropy view)": 0.7772795216741405,
      "VeMo (min entropy view)": 0.887459807073955,
      "VeMo (random view)": 0.887459807073955,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stretches their back by rotating their arms."
  },
  "002868": {
    "text": "a person does a drumming movement with both hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2856283608437164,
      "Minus Multimodal Distance": -2.984661340713501,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00018134285346604884,
      "MoBERT-F": 0.5359268651695908,
      "MoBERT-N": 0.5934846599108796,
      "MoBERT-min(F/N)": 0.5359268651695908,
      "MoBERT-max(F/N)": 0.5934846599108796,
      "MotionCritic": -9.20850944519043,
      "VeMo (human-opt view)": 0.9101449275362319,
      "VeMo (max entropy view)": 0.8436363636363636,
      "VeMo (min entropy view)": 0.9101449275362319,
      "VeMo (random view)": 0.8436363636363636,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person makes a drumming movement with both hands."
  },
  "005610": {
    "text": "person runs backwards performing an a like pattern",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.303410974479021,
      "Minus Multimodal Distance": -5.723556041717529,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.23359955847263336,
      "MoBERT-F": 0.5285578906485596,
      "MoBERT-N": 0.5825895201313497,
      "MoBERT-min(F/N)": 0.5285578906485596,
      "MoBERT-max(F/N)": 0.5825895201313497,
      "MotionCritic": -8.296083450317383,
      "VeMo (human-opt view)": 0.4524421593830334,
      "VeMo (max entropy view)": 0.4524421593830334,
      "VeMo (min entropy view)": 0.7060869565217391,
      "VeMo (random view)": 0.7060869565217391,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person runs backwards, performing an \"A\"-like pattern."
  },
  "008431": {
    "text": "the person is practing balancing on one leg.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7075570434469552,
      "Minus Multimodal Distance": -5.556171894073486,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00037811053334735334,
      "MoBERT-F": 0.60472324521547,
      "MoBERT-N": 0.5717783290747864,
      "MoBERT-min(F/N)": 0.5717783290747864,
      "MoBERT-max(F/N)": 0.60472324521547,
      "MotionCritic": -11.322973251342773,
      "VeMo (human-opt view)": 0.9668962452788269,
      "VeMo (max entropy view)": 0.9668962452788269,
      "VeMo (min entropy view)": 0.9706884798909339,
      "VeMo (random view)": 0.9706884798909339,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is practicing balancing on one leg."
  },
  "011632": {
    "text": "a person takes one large step forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2386554345176183,
      "Minus Multimodal Distance": -7.591957092285156,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2613314285990782e-05,
      "MoBERT-F": 0.38250648987761493,
      "MoBERT-N": 0.5324156271026854,
      "MoBERT-min(F/N)": 0.38250648987761493,
      "MoBERT-max(F/N)": 0.5324156271026854,
      "MotionCritic": -3.3825411796569824,
      "VeMo (human-opt view)": 0.9829950630828305,
      "VeMo (max entropy view)": 0.9770104497955475,
      "VeMo (min entropy view)": 0.9829950630828305,
      "VeMo (random view)": 0.9770104497955475,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person takes one large step forward."
  },
  "012117": {
    "text": "a person lifting their left arm up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2698237104476948,
      "Minus Multimodal Distance": -12.517285346984863,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.022666300646961e-05,
      "MoBERT-F": 0.32080287934124746,
      "MoBERT-N": 0.38810697066679334,
      "MoBERT-min(F/N)": 0.32080287934124746,
      "MoBERT-max(F/N)": 0.38810697066679334,
      "MotionCritic": -8.665552139282227,
      "VeMo (human-opt view)": 0.9466300701433363,
      "VeMo (max entropy view)": 0.9466300701433363,
      "VeMo (min entropy view)": 0.9859212405631503,
      "VeMo (random view)": 0.9466300701433363,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is lifting their left arm up."
  },
  "003771": {
    "text": "a person acting like a tiger",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8133529073707187,
      "Minus Multimodal Distance": -9.082758903503418,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0003730901808012277,
      "MoBERT-F": 0.5572560751228475,
      "MoBERT-N": 0.5151877173750428,
      "MoBERT-min(F/N)": 0.5151877173750428,
      "MoBERT-max(F/N)": 0.5572560751228475,
      "MotionCritic": -3.6048004627227783,
      "VeMo (human-opt view)": 0.7666034155597723,
      "VeMo (max entropy view)": 0.6648122392211405,
      "VeMo (min entropy view)": 0.7666034155597723,
      "VeMo (random view)": 0.7666034155597723,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person acting like a tiger."
  },
  "006177": {
    "text": "waving hands in and out.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2535122339621758,
      "Minus Multimodal Distance": -3.4859886169433594,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.13267191813793e-05,
      "MoBERT-F": 0.4505270178125377,
      "MoBERT-N": 0.4641394870748409,
      "MoBERT-min(F/N)": 0.4505270178125377,
      "MoBERT-max(F/N)": 0.4641394870748409,
      "MotionCritic": -10.022588729858398,
      "VeMo (human-opt view)": 0.9867460102786043,
      "VeMo (max entropy view)": 0.9867460102786043,
      "VeMo (min entropy view)": 0.9875577288780223,
      "VeMo (random view)": 0.9875577288780223,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is waving hands in and out."
  },
  "003782": {
    "text": "person walked forward picked up an item and walked to the back and set the item down",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5706506897498909,
      "Minus Multimodal Distance": -5.426483154296875,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0003316480142530054,
      "MoBERT-F": 0.3911573461075145,
      "MoBERT-N": 0.5810457948866836,
      "MoBERT-min(F/N)": 0.3911573461075145,
      "MoBERT-max(F/N)": 0.5810457948866836,
      "MotionCritic": -7.503255367279053,
      "VeMo (human-opt view)": 0.0029747925473618286,
      "VeMo (max entropy view)": 0.0029747925473618286,
      "VeMo (min entropy view)": 0.0010957586123954352,
      "VeMo (random view)": 0.0029747925473618286,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walked forward, picked up an item, walked to the back, and set the item down."
  },
  "000715": {
    "text": "person steps left forward pivots on leg and turns around walks back and faces other direction",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6408543950764577,
      "Minus Multimodal Distance": -5.879288196563721,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.04191609472036362,
      "MoBERT-F": 0.5110054652397599,
      "MoBERT-N": 0.5552436931685458,
      "MoBERT-min(F/N)": 0.5110054652397599,
      "MoBERT-max(F/N)": 0.5552436931685458,
      "MotionCritic": -11.932307243347168,
      "VeMo (human-opt view)": 0.8808227465214761,
      "VeMo (max entropy view)": 0.7879924953095685,
      "VeMo (min entropy view)": 0.8808227465214761,
      "VeMo (random view)": 0.8808227465214761,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps forward to the left, pivots on one leg, turns around, walks back, and faces the other direction."
  },
  "011827": {
    "text": "a person bends down, picks something up, sticks it in the ground and rotates it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0921525918142734,
      "Minus Multimodal Distance": -4.746299743652344,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.1860111951828003,
      "MoBERT-F": 0.52118374375016,
      "MoBERT-N": 0.5374875787945236,
      "MoBERT-min(F/N)": 0.52118374375016,
      "MoBERT-max(F/N)": 0.5374875787945236,
      "MotionCritic": -5.627604007720947,
      "VeMo (human-opt view)": 0.14,
      "VeMo (max entropy view)": 0.21203830369357046,
      "VeMo (min entropy view)": 0.14,
      "VeMo (random view)": 0.21203830369357046,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends down, picks something up, sticks it in the ground, and rotates it."
  },
  "001193": {
    "text": "a man jumps once and then wobbles a little while moving legs apart.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4084516392670014,
      "Minus Multimodal Distance": -5.766691207885742,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00035680868313647807,
      "MoBERT-F": 0.5942995200306634,
      "MoBERT-N": 0.7081530558798119,
      "MoBERT-min(F/N)": 0.5942995200306634,
      "MoBERT-max(F/N)": 0.7081530558798119,
      "MotionCritic": -9.360223770141602,
      "VeMo (human-opt view)": 0.34798534798534797,
      "VeMo (max entropy view)": 0.34798534798534797,
      "VeMo (min entropy view)": 0.23387872954764197,
      "VeMo (random view)": 0.34798534798534797,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man jumps once and then wobbles a little while moving his legs apart."
  },
  "011504": {
    "text": "a person is stepping around while their hands are behind their back",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9195083430203925,
      "Minus Multimodal Distance": -7.097456455230713,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.856315743178129e-05,
      "MoBERT-F": 0.4945063336330513,
      "MoBERT-N": 0.47928615260513246,
      "MoBERT-min(F/N)": 0.47928615260513246,
      "MoBERT-max(F/N)": 0.4945063336330513,
      "MotionCritic": -5.037291049957275,
      "VeMo (human-opt view)": 0.2016895459345301,
      "VeMo (max entropy view)": 0.7877629063097514,
      "VeMo (min entropy view)": 0.2016895459345301,
      "VeMo (random view)": 0.7877629063097514,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is stepping around while their hands are behind their back."
  },
  "013778": {
    "text": "a person sits down, turns to their right, then stands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2657059098381587,
      "Minus Multimodal Distance": -8.87187671661377,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0011322522768750787,
      "MoBERT-F": 0.4461058606814851,
      "MoBERT-N": 0.5097712449200883,
      "MoBERT-min(F/N)": 0.4461058606814851,
      "MoBERT-max(F/N)": 0.5097712449200883,
      "MotionCritic": -10.519575119018555,
      "VeMo (human-opt view)": 0.860015467904099,
      "VeMo (max entropy view)": 0.8266666666666667,
      "VeMo (min entropy view)": 0.860015467904099,
      "VeMo (random view)": 0.860015467904099,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits down, turns to their right, then stands."
  },
  "009654": {
    "text": "someone waits a moment and jumps to the right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2916467539307838,
      "Minus Multimodal Distance": -7.239222049713135,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.421966877998784e-05,
      "MoBERT-F": 0.30264149702170173,
      "MoBERT-N": 0.41401529750732474,
      "MoBERT-min(F/N)": 0.30264149702170173,
      "MoBERT-max(F/N)": 0.41401529750732474,
      "MotionCritic": -6.920711040496826,
      "VeMo (human-opt view)": 0.32051282051282054,
      "VeMo (max entropy view)": 0.6786516853932584,
      "VeMo (min entropy view)": 0.32051282051282054,
      "VeMo (random view)": 0.6786516853932584,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone waits for a moment and jumps to the right."
  },
  "003440": {
    "text": "the person is moving from side to side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8125799540380378,
      "Minus Multimodal Distance": -3.114804983139038,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.4967423975467682,
      "MoBERT-F": 0.707131417785722,
      "MoBERT-N": 0.6441341701262603,
      "MoBERT-min(F/N)": 0.6441341701262603,
      "MoBERT-max(F/N)": 0.707131417785722,
      "MotionCritic": -2.7080307006835938,
      "VeMo (human-opt view)": 0.9796632407609884,
      "VeMo (max entropy view)": 0.9626188896261889,
      "VeMo (min entropy view)": 0.9796632407609884,
      "VeMo (random view)": 0.9796632407609884,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is moving from side to side."
  },
  "005266": {
    "text": "a figure walks and spins on their heel.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4163119392110957,
      "Minus Multimodal Distance": -11.186176300048828,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.006588398478925228,
      "MoBERT-F": 0.6119500998515613,
      "MoBERT-N": 0.6798105549702975,
      "MoBERT-min(F/N)": 0.6119500998515613,
      "MoBERT-max(F/N)": 0.6798105549702975,
      "MotionCritic": -12.051473617553711,
      "VeMo (human-opt view)": 0.6219239373601789,
      "VeMo (max entropy view)": 0.468682505399568,
      "VeMo (min entropy view)": 0.6219239373601789,
      "VeMo (random view)": 0.6219239373601789,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks and spins on their heel."
  },
  "014215": {
    "text": "a person stands still and does not move.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.20084680951213355,
      "Minus Multimodal Distance": -11.015833854675293,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.360008799470961e-05,
      "MoBERT-F": 0.2973943072304941,
      "MoBERT-N": 0.39384549001925756,
      "MoBERT-min(F/N)": 0.2973943072304941,
      "MoBERT-max(F/N)": 0.39384549001925756,
      "MotionCritic": -12.946515083312988,
      "VeMo (human-opt view)": 0.9964162810015427,
      "VeMo (max entropy view)": 0.9944506104328524,
      "VeMo (min entropy view)": 0.9964162810015427,
      "VeMo (random view)": 0.9964162810015427,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands still and does not move."
  },
  "008725": {
    "text": "a man walks forward in a straight line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4883261702307332,
      "Minus Multimodal Distance": -5.399500846862793,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.34818162425654e-05,
      "MoBERT-F": 0.41606172660157037,
      "MoBERT-N": 0.5790718353094149,
      "MoBERT-min(F/N)": 0.41606172660157037,
      "MoBERT-max(F/N)": 0.5790718353094149,
      "MotionCritic": -1.5096532106399536,
      "VeMo (human-opt view)": 0.9796376913354866,
      "VeMo (max entropy view)": 0.9796376913354866,
      "VeMo (min entropy view)": 0.9914172074523759,
      "VeMo (random view)": 0.9796376913354866,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks forward in a straight line."
  },
  "003785": {
    "text": "a person is scrubbing a window",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.38575790894267736,
      "Minus Multimodal Distance": -9.169466018676758,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.788122896570712e-05,
      "MoBERT-F": 0.3029812833927019,
      "MoBERT-N": 0.39690305316464264,
      "MoBERT-min(F/N)": 0.3029812833927019,
      "MoBERT-max(F/N)": 0.39690305316464264,
      "MotionCritic": -6.659245491027832,
      "VeMo (human-opt view)": 1.613323643024737e-05,
      "VeMo (max entropy view)": 1.613323643024737e-05,
      "VeMo (min entropy view)": 1.613323643024737e-05,
      "VeMo (random view)": 1.613323643024737e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is scrubbing a window."
  },
  "002888": {
    "text": "a person walking down a stage.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6898738382053364,
      "Minus Multimodal Distance": -11.717881202697754,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.693600203609094e-05,
      "MoBERT-F": 0.32906089112839615,
      "MoBERT-N": 0.4443246390827644,
      "MoBERT-min(F/N)": 0.32906089112839615,
      "MoBERT-max(F/N)": 0.4443246390827644,
      "MotionCritic": -6.123112201690674,
      "VeMo (human-opt view)": 0.9688722571865963,
      "VeMo (max entropy view)": 0.9467084639498433,
      "VeMo (min entropy view)": 0.9688722571865963,
      "VeMo (random view)": 0.9688722571865963,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking down a stage."
  },
  "004707": {
    "text": "the figure curls its arms outwards from its chest, it lowers its arms in the motion towards its groin and then raises them upwards at head level.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5053605644537373,
      "Minus Multimodal Distance": -4.060651779174805,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.598829400492832e-05,
      "MoBERT-F": 0.32750895350531717,
      "MoBERT-N": 0.4418982268805947,
      "MoBERT-min(F/N)": 0.32750895350531717,
      "MoBERT-max(F/N)": 0.4418982268805947,
      "MotionCritic": -11.035249710083008,
      "VeMo (human-opt view)": 0.6916299559471366,
      "VeMo (max entropy view)": 0.6916299559471366,
      "VeMo (min entropy view)": 0.7773972602739726,
      "VeMo (random view)": 0.7773972602739726,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person curls their arms outwards from their chest. They lower their arms in a motion towards their groin and then raise them upwards to head level."
  },
  "002247": {
    "text": "a person walking and helping maintain their balance and support,  from holding onto a side rail or wall.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.444304052926017,
      "Minus Multimodal Distance": -3.117184638977051,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.406819450901821e-05,
      "MoBERT-F": 0.33113566742503364,
      "MoBERT-N": 0.4772986531780208,
      "MoBERT-min(F/N)": 0.33113566742503364,
      "MoBERT-max(F/N)": 0.4772986531780208,
      "MotionCritic": -10.032623291015625,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.5776173285198556,
      "VeMo (random view)": 0.5776173285198556,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking and helps maintain their balance and support by holding onto a side rail or wall."
  },
  "001029": {
    "text": "someone is walking forward and holding a handrail very carefully, as if they are afraid of falling.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5588303559028226,
      "Minus Multimodal Distance": -3.6936323642730713,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.3114745494676754e-05,
      "MoBERT-F": 0.31654023544981297,
      "MoBERT-N": 0.44219990424982336,
      "MoBERT-min(F/N)": 0.31654023544981297,
      "MoBERT-max(F/N)": 0.44219990424982336,
      "MotionCritic": -8.084394454956055,
      "VeMo (human-opt view)": 0.00021674178356206005,
      "VeMo (max entropy view)": 0.00026129387053192547,
      "VeMo (min entropy view)": 0.00021674178356206005,
      "VeMo (random view)": 0.00021674178356206005,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone is walking forward and holding a handrail very carefully, as if they're afraid of falling."
  },
  "004067": {
    "text": "a person reaches forward with the right hand and then lets their arm drop to their side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2696842353807765,
      "Minus Multimodal Distance": -8.48318862915039,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.5633544030133635e-05,
      "MoBERT-F": 0.269759987684981,
      "MoBERT-N": 0.3937882929101942,
      "MoBERT-min(F/N)": 0.269759987684981,
      "MoBERT-max(F/N)": 0.3937882929101942,
      "MotionCritic": -9.635103225708008,
      "VeMo (human-opt view)": 0.9819716148830073,
      "VeMo (max entropy view)": 0.9097472924187726,
      "VeMo (min entropy view)": 0.9819716148830073,
      "VeMo (random view)": 0.9819716148830073,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person reaches forward with their right hand and then lets their arm drop to their side."
  },
  "008016": {
    "text": "a man reaches over and picks up some wet soap, and washes his hands with it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6796796194358284,
      "Minus Multimodal Distance": -10.466813087463379,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.559933975338936e-05,
      "MoBERT-F": 0.4130645574821644,
      "MoBERT-N": 0.4222564642948579,
      "MoBERT-min(F/N)": 0.4130645574821644,
      "MoBERT-max(F/N)": 0.4222564642948579,
      "MotionCritic": -0.027310766279697418,
      "VeMo (human-opt view)": 0.10647803425167536,
      "VeMo (max entropy view)": 0.10647803425167536,
      "VeMo (min entropy view)": 9.340658396077152e-05,
      "VeMo (random view)": 9.340658396077152e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man reaches over, picks up some wet soap, and washes his hands with it."
  },
  "002055": {
    "text": "a person bends at the waist, picks something up, then puts it down at waist level",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3386465902573315,
      "Minus Multimodal Distance": -9.769379615783691,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.341202889510896e-05,
      "MoBERT-F": 0.3584757315741427,
      "MoBERT-N": 0.41690028997612866,
      "MoBERT-min(F/N)": 0.3584757315741427,
      "MoBERT-max(F/N)": 0.41690028997612866,
      "MotionCritic": -4.286733627319336,
      "VeMo (human-opt view)": 0.9436298468556533,
      "VeMo (max entropy view)": 0.8990825688073395,
      "VeMo (min entropy view)": 0.9436298468556533,
      "VeMo (random view)": 0.8990825688073395,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person bends at the waist, picks something up, and then puts it down at waist level."
  },
  "014445": {
    "text": "a person raises both arms to his face, then drops them back to his sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2506806288518844,
      "Minus Multimodal Distance": -3.959980010986328,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.28505263698753e-05,
      "MoBERT-F": 0.2850623345965441,
      "MoBERT-N": 0.396770562548328,
      "MoBERT-min(F/N)": 0.2850623345965441,
      "MoBERT-max(F/N)": 0.396770562548328,
      "MotionCritic": -11.44791316986084,
      "VeMo (human-opt view)": 0.9996845348056773,
      "VeMo (max entropy view)": 0.9889869111457072,
      "VeMo (min entropy view)": 0.9996845348056773,
      "VeMo (random view)": 0.9889869111457072,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises both arms to their face, then drops them back to their sides."
  },
  "012495": {
    "text": "a person walks quickly forward, moving at a slight angle to the right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5032910847342396,
      "Minus Multimodal Distance": -3.3349645137786865,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8996482797083445e-05,
      "MoBERT-F": 0.36377813586867935,
      "MoBERT-N": 0.42634102969938087,
      "MoBERT-min(F/N)": 0.36377813586867935,
      "MoBERT-max(F/N)": 0.42634102969938087,
      "MotionCritic": -1.5146660804748535,
      "VeMo (human-opt view)": 0.8522483940042827,
      "VeMo (max entropy view)": 0.8350877192982457,
      "VeMo (min entropy view)": 0.8522483940042827,
      "VeMo (random view)": 0.8350877192982457,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks quickly forward, moving at a slight angle to the right."
  },
  "012558": {
    "text": "a person walks forward and then pulls something behind them.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0337178398559028,
      "Minus Multimodal Distance": -7.874589920043945,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5885370632749982e-05,
      "MoBERT-F": 0.40946982199415183,
      "MoBERT-N": 0.4725228431521423,
      "MoBERT-min(F/N)": 0.40946982199415183,
      "MoBERT-max(F/N)": 0.4725228431521423,
      "MotionCritic": -6.941680908203125,
      "VeMo (human-opt view)": 0.09557618787547788,
      "VeMo (max entropy view)": 0.22330097087378642,
      "VeMo (min entropy view)": 0.09557618787547788,
      "VeMo (random view)": 0.09557618787547788,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward and then pulls something behind them."
  },
  "005364": {
    "text": "a person walks forward as their right hand guides them",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5099070657462021,
      "Minus Multimodal Distance": -12.036996841430664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5706845917738974e-05,
      "MoBERT-F": 0.3727421324181077,
      "MoBERT-N": 0.4443318132319025,
      "MoBERT-min(F/N)": 0.3727421324181077,
      "MoBERT-max(F/N)": 0.4443318132319025,
      "MotionCritic": -6.423940181732178,
      "VeMo (human-opt view)": 0.4079601990049751,
      "VeMo (max entropy view)": 0.4079601990049751,
      "VeMo (min entropy view)": 0.34831460674157305,
      "VeMo (random view)": 0.4079601990049751,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward as their right hand guides them."
  },
  "003934": {
    "text": "a person stands, waving back and forth with one arm.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.33154410749665014,
      "Minus Multimodal Distance": -3.258647918701172,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.580850170692429e-05,
      "MoBERT-F": 0.38288357734191864,
      "MoBERT-N": 0.4296948564692866,
      "MoBERT-min(F/N)": 0.38288357734191864,
      "MoBERT-max(F/N)": 0.4296948564692866,
      "MotionCritic": -6.172448635101318,
      "VeMo (human-opt view)": 0.9940864445064846,
      "VeMo (max entropy view)": 0.986784140969163,
      "VeMo (min entropy view)": 0.9940864445064846,
      "VeMo (random view)": 0.9940864445064846,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands, waving back and forth with one arm."
  },
  "008315": {
    "text": "he is self balancing while walking through a narrow bridge.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.45593932473765353,
      "Minus Multimodal Distance": -7.682748794555664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0059370240196585655,
      "MoBERT-F": 0.6615255206251973,
      "MoBERT-N": 0.6035891233850749,
      "MoBERT-min(F/N)": 0.6035891233850749,
      "MoBERT-max(F/N)": 0.6615255206251973,
      "MotionCritic": 1.7620080709457397,
      "VeMo (human-opt view)": 0.0008566232552599875,
      "VeMo (max entropy view)": 0.0008566232552599875,
      "VeMo (min entropy view)": 0.00033621342666294,
      "VeMo (random view)": 0.00033621342666294,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is self - balancing while walking across a narrow bridge."
  },
  "009289": {
    "text": "the person walked forward and leaned over the table.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5794371781548048,
      "Minus Multimodal Distance": -7.693653583526611,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.491506893420592e-05,
      "MoBERT-F": 0.3020571924492892,
      "MoBERT-N": 0.5501748304181835,
      "MoBERT-min(F/N)": 0.3020571924492892,
      "MoBERT-max(F/N)": 0.5501748304181835,
      "MotionCritic": -6.951165676116943,
      "VeMo (human-opt view)": 7.273523788184955e-05,
      "VeMo (max entropy view)": 0.0002532460761182075,
      "VeMo (min entropy view)": 7.273523788184955e-05,
      "VeMo (random view)": 7.273523788184955e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person walked forward and leaned over the table."
  },
  "003712": {
    "text": "the person crawls forward at a slow speed",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0361596293822175,
      "Minus Multimodal Distance": -6.077139854431152,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9969882369041443,
      "MoBERT-F": 0.8465039387968722,
      "MoBERT-N": 0.7475565857944086,
      "MoBERT-min(F/N)": 0.7475565857944086,
      "MoBERT-max(F/N)": 0.8465039387968722,
      "MotionCritic": -7.2282185554504395,
      "VeMo (human-opt view)": 0.9941041365756635,
      "VeMo (max entropy view)": 0.9928721174004193,
      "VeMo (min entropy view)": 0.9941041365756635,
      "VeMo (random view)": 0.9941041365756635,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person crawls forward at a slow speed."
  },
  "008583": {
    "text": "a person walks in a s shape",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8408646005288023,
      "Minus Multimodal Distance": -7.245343208312988,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.930715269641951e-05,
      "MoBERT-F": 0.47731385281825667,
      "MoBERT-N": 0.550491770714038,
      "MoBERT-min(F/N)": 0.47731385281825667,
      "MoBERT-max(F/N)": 0.550491770714038,
      "MotionCritic": -1.237687587738037,
      "VeMo (human-opt view)": 0.4226190476190476,
      "VeMo (max entropy view)": 0.4226190476190476,
      "VeMo (min entropy view)": 0.36259541984732824,
      "VeMo (random view)": 0.4226190476190476,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks in an S shape."
  },
  "010850": {
    "text": "a figure waves with their right hand",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.26926313130609186,
      "Minus Multimodal Distance": -7.353980541229248,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.9882164855953306e-05,
      "MoBERT-F": 0.2967414140115805,
      "MoBERT-N": 0.32343143533289154,
      "MoBERT-min(F/N)": 0.2967414140115805,
      "MoBERT-max(F/N)": 0.32343143533289154,
      "MotionCritic": -11.391947746276855,
      "VeMo (human-opt view)": 0.966900702106319,
      "VeMo (max entropy view)": 0.9147788565264293,
      "VeMo (min entropy view)": 0.966900702106319,
      "VeMo (random view)": 0.966900702106319,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure waves with their right hand."
  },
  "010043": {
    "text": "the person is doing the cha-cha.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3789723359135943,
      "Minus Multimodal Distance": -8.72119426727295,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0013013416901230812,
      "MoBERT-F": 0.5636657251558531,
      "MoBERT-N": 0.5334806054741463,
      "MoBERT-min(F/N)": 0.5334806054741463,
      "MoBERT-max(F/N)": 0.5636657251558531,
      "MotionCritic": -9.32097053527832,
      "VeMo (human-opt view)": 0.0020487945335946932,
      "VeMo (max entropy view)": 0.0020487945335946932,
      "VeMo (min entropy view)": 0.0006660421779223045,
      "VeMo (random view)": 0.0006660421779223045,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is doing the cha - cha."
  },
  "001705": {
    "text": "a person is pitching a baseball.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5882063026047543,
      "Minus Multimodal Distance": -3.3655037879943848,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.848773121717386e-05,
      "MoBERT-F": 0.5114840688638779,
      "MoBERT-N": 0.5579089650091599,
      "MoBERT-min(F/N)": 0.5114840688638779,
      "MoBERT-max(F/N)": 0.5579089650091599,
      "MotionCritic": -7.458526134490967,
      "VeMo (human-opt view)": 0.46946564885496184,
      "VeMo (max entropy view)": 0.46946564885496184,
      "VeMo (min entropy view)": 0.5623342175066313,
      "VeMo (random view)": 0.5623342175066313,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is pitching a baseball."
  },
  "013646": {
    "text": "walking from side to side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3949811212647654,
      "Minus Multimodal Distance": -9.02924633026123,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.6080116842640564e-05,
      "MoBERT-F": 0.514258026166551,
      "MoBERT-N": 0.6082965453356607,
      "MoBERT-min(F/N)": 0.514258026166551,
      "MoBERT-max(F/N)": 0.6082965453356607,
      "MotionCritic": -7.903124809265137,
      "VeMo (human-opt view)": 0.8807692307692307,
      "VeMo (max entropy view)": 0.6928327645051194,
      "VeMo (min entropy view)": 0.8807692307692307,
      "VeMo (random view)": 0.8807692307692307,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking from side to side."
  },
  "011864": {
    "text": "a person, standing in the middle of the screen, raises their arms and waves twice before lowering their arms again.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.341123479851282,
      "Minus Multimodal Distance": -3.890608787536621,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0031963621731847525,
      "MoBERT-F": 0.4850524872238751,
      "MoBERT-N": 0.48438310687162844,
      "MoBERT-min(F/N)": 0.48438310687162844,
      "MoBERT-max(F/N)": 0.4850524872238751,
      "MotionCritic": -12.352312088012695,
      "VeMo (human-opt view)": 0.9770023790642347,
      "VeMo (max entropy view)": 0.9687560644284883,
      "VeMo (min entropy view)": 0.9770023790642347,
      "VeMo (random view)": 0.9770023790642347,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person, standing in the middle of the screen, raises their arms and waves twice before lowering their arms again."
  },
  "006839": {
    "text": "a person walks around and then crouches down with their arms forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.228027659306858,
      "Minus Multimodal Distance": -9.1907377243042,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.4285323636140674e-05,
      "MoBERT-F": 0.3978512416170966,
      "MoBERT-N": 0.47649874349121435,
      "MoBERT-min(F/N)": 0.3978512416170966,
      "MoBERT-max(F/N)": 0.47649874349121435,
      "MotionCritic": -11.916749954223633,
      "VeMo (human-opt view)": 0.9947818363495059,
      "VeMo (max entropy view)": 0.993704537299801,
      "VeMo (min entropy view)": 0.9947818363495059,
      "VeMo (random view)": 0.993704537299801,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks around and then crouches down with their arms forward."
  },
  "014512": {
    "text": "a person walks backwards, then turns around then walks backwards again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6370059015082105,
      "Minus Multimodal Distance": -3.9293885231018066,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9960991144180298,
      "MoBERT-F": 0.6980639691860627,
      "MoBERT-N": 0.7294257233536254,
      "MoBERT-min(F/N)": 0.6980639691860627,
      "MoBERT-max(F/N)": 0.7294257233536254,
      "MotionCritic": -13.037402153015137,
      "VeMo (human-opt view)": 0.8352490421455939,
      "VeMo (max entropy view)": 0.5308219178082192,
      "VeMo (min entropy view)": 0.8352490421455939,
      "VeMo (random view)": 0.8352490421455939,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks backwards, then turns around, then walks backwards again."
  },
  "000118": {
    "text": "person walks forwards slowly and normally without swinging arms",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.23220413048771288,
      "Minus Multimodal Distance": -2.9100184440612793,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 7.025674858596176e-05,
      "MoBERT-F": 0.36813653609067487,
      "MoBERT-N": 0.5373382089791573,
      "MoBERT-min(F/N)": 0.36813653609067487,
      "MoBERT-max(F/N)": 0.5373382089791573,
      "MotionCritic": -5.783731937408447,
      "VeMo (human-opt view)": 0.932620320855615,
      "VeMo (max entropy view)": 0.9049773755656109,
      "VeMo (min entropy view)": 0.932620320855615,
      "VeMo (random view)": 0.932620320855615,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward slowly and normally without swinging their arms."
  },
  "003174": {
    "text": "a person is walking across a narrow beam.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6682831428843066,
      "Minus Multimodal Distance": -7.650737762451172,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.8123038646299392e-05,
      "MoBERT-F": 0.5387306249030628,
      "MoBERT-N": 0.5200840252858191,
      "MoBERT-min(F/N)": 0.5200840252858191,
      "MoBERT-max(F/N)": 0.5387306249030628,
      "MotionCritic": -5.279386043548584,
      "VeMo (human-opt view)": 0.0059194014301273854,
      "VeMo (max entropy view)": 0.020250723240115717,
      "VeMo (min entropy view)": 0.0059194014301273854,
      "VeMo (random view)": 0.020250723240115717,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking across a narrow beam."
  },
  "007970": {
    "text": "the person was pushed but didnt fall",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.29304314124029407,
      "Minus Multimodal Distance": -6.078224182128906,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3402119040838443e-05,
      "MoBERT-F": 0.34858864927192995,
      "MoBERT-N": 0.44590401454763273,
      "MoBERT-min(F/N)": 0.34858864927192995,
      "MoBERT-max(F/N)": 0.44590401454763273,
      "MotionCritic": -6.337631702423096,
      "VeMo (human-opt view)": 0.8671808054841473,
      "VeMo (max entropy view)": 0.8080808080808081,
      "VeMo (min entropy view)": 0.8671808054841473,
      "VeMo (random view)": 0.8080808080808081,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was pushed but didn't fall."
  },
  "009871": {
    "text": "a man side-skips from left to right repeatedly.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.558095849187221,
      "Minus Multimodal Distance": -6.801329612731934,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9947775602340698,
      "MoBERT-F": 0.847443742118595,
      "MoBERT-N": 0.733998957991166,
      "MoBERT-min(F/N)": 0.733998957991166,
      "MoBERT-max(F/N)": 0.847443742118595,
      "MotionCritic": -4.3279194831848145,
      "VeMo (human-opt view)": 0.9498525073746312,
      "VeMo (max entropy view)": 0.9197769197769198,
      "VeMo (min entropy view)": 0.9498525073746312,
      "VeMo (random view)": 0.9498525073746312,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man side - skips from left to right repeatedly."
  },
  "013969": {
    "text": "the person is getting ready to watch tv.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.43821768600247707,
      "Minus Multimodal Distance": -7.57256555557251,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.2294788979925215e-05,
      "MoBERT-F": 0.36134242324706156,
      "MoBERT-N": 0.4800024551513194,
      "MoBERT-min(F/N)": 0.36134242324706156,
      "MoBERT-max(F/N)": 0.4800024551513194,
      "MotionCritic": -10.35477352142334,
      "VeMo (human-opt view)": 0.172978505629478,
      "VeMo (max entropy view)": 0.3629032258064516,
      "VeMo (min entropy view)": 0.172978505629478,
      "VeMo (random view)": 0.3629032258064516,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is getting ready to watch TV."
  },
  "001614": {
    "text": "a person walks while dragging his feet.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.828336574796903,
      "Minus Multimodal Distance": -5.242754936218262,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00033266228274442255,
      "MoBERT-F": 0.45290035612577795,
      "MoBERT-N": 0.5412697298854174,
      "MoBERT-min(F/N)": 0.45290035612577795,
      "MoBERT-max(F/N)": 0.5412697298854174,
      "MotionCritic": -3.0414934158325195,
      "VeMo (human-opt view)": 0.808199121522694,
      "VeMo (max entropy view)": 0.808199121522694,
      "VeMo (min entropy view)": 0.8598781549173194,
      "VeMo (random view)": 0.808199121522694,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks while dragging their feet."
  },
  "011805": {
    "text": "the  person was walking very fast.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.28171252298116245,
      "Minus Multimodal Distance": -11.270672798156738,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.334689219831489e-05,
      "MoBERT-F": 0.5309105588706118,
      "MoBERT-N": 0.5380695322067436,
      "MoBERT-min(F/N)": 0.5309105588706118,
      "MoBERT-max(F/N)": 0.5380695322067436,
      "MotionCritic": -5.3294830322265625,
      "VeMo (human-opt view)": 0.03739837398373984,
      "VeMo (max entropy view)": 0.11939034716342083,
      "VeMo (min entropy view)": 0.03739837398373984,
      "VeMo (random view)": 0.11939034716342083,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was walking very fast."
  },
  "007820": {
    "text": "a person jumps and spins 180 degrees.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.537953403504149,
      "Minus Multimodal Distance": -14.26815128326416,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.6045737862586975,
      "MoBERT-F": 0.7030125510741332,
      "MoBERT-N": 0.704338464669582,
      "MoBERT-min(F/N)": 0.7030125510741332,
      "MoBERT-max(F/N)": 0.704338464669582,
      "MotionCritic": -11.497759819030762,
      "VeMo (human-opt view)": 0.015925315760571115,
      "VeMo (max entropy view)": 0.015925315760571115,
      "VeMo (min entropy view)": 0.0004292892857588178,
      "VeMo (random view)": 0.0004292892857588178,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps and spins 180 degrees."
  },
  "001808": {
    "text": "the person is standing there.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.18899204326028685,
      "Minus Multimodal Distance": -6.754134178161621,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5648247174103744e-05,
      "MoBERT-F": 0.4046221377464694,
      "MoBERT-N": 0.45183032595534117,
      "MoBERT-min(F/N)": 0.4046221377464694,
      "MoBERT-max(F/N)": 0.45183032595534117,
      "MotionCritic": -7.612436771392822,
      "VeMo (human-opt view)": 0.9964041569526219,
      "VeMo (max entropy view)": 0.9944605093544229,
      "VeMo (min entropy view)": 0.9964041569526219,
      "VeMo (random view)": 0.9944605093544229,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is standing there."
  },
  "001161": {
    "text": "person runs quickly straight forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9766004219468902,
      "Minus Multimodal Distance": -3.861130952835083,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.347830195503775e-05,
      "MoBERT-F": 0.46118817700462345,
      "MoBERT-N": 0.5663147294395569,
      "MoBERT-min(F/N)": 0.46118817700462345,
      "MoBERT-max(F/N)": 0.5663147294395569,
      "MotionCritic": -7.234933376312256,
      "VeMo (human-opt view)": 0.887055183084064,
      "VeMo (max entropy view)": 0.8085106382978723,
      "VeMo (min entropy view)": 0.887055183084064,
      "VeMo (random view)": 0.8085106382978723,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person runs quickly straight forward."
  },
  "007301": {
    "text": "moving hands and jumping.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6617983359800387,
      "Minus Multimodal Distance": -13.221548080444336,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9532604217529297,
      "MoBERT-F": 0.8556049640646365,
      "MoBERT-N": 0.7532012188157908,
      "MoBERT-min(F/N)": 0.7532012188157908,
      "MoBERT-max(F/N)": 0.8556049640646365,
      "MotionCritic": 0.8257890939712524,
      "VeMo (human-opt view)": 0.7551401869158878,
      "VeMo (max entropy view)": 0.6651053864168618,
      "VeMo (min entropy view)": 0.7551401869158878,
      "VeMo (random view)": 0.6651053864168618,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is moving hands and jumping."
  },
  "006712": {
    "text": "a person stands in a defensive stance with right arm and leg forward, then uses the right forearm for a block across the body.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5159999430158825,
      "Minus Multimodal Distance": -10.564906120300293,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0003511748509481549,
      "MoBERT-F": 0.4841916226597874,
      "MoBERT-N": 0.5680564341681213,
      "MoBERT-min(F/N)": 0.4841916226597874,
      "MoBERT-max(F/N)": 0.5680564341681213,
      "MotionCritic": -3.441283702850342,
      "VeMo (human-opt view)": 0.788546255506608,
      "VeMo (max entropy view)": 0.7542372881355932,
      "VeMo (min entropy view)": 0.788546255506608,
      "VeMo (random view)": 0.788546255506608,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands in a defensive stance with the right arm and leg forward, then uses the right forearm to block across the body."
  },
  "004299": {
    "text": "a man is picking an object up from the left side and then places it to his right side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.12585305273360783,
      "Minus Multimodal Distance": -6.599660396575928,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7296176995150745e-05,
      "MoBERT-F": 0.31512906819261866,
      "MoBERT-N": 0.4462002342358201,
      "MoBERT-min(F/N)": 0.31512906819261866,
      "MoBERT-max(F/N)": 0.4462002342358201,
      "MotionCritic": -2.8747236728668213,
      "VeMo (human-opt view)": 0.6926070038910506,
      "VeMo (max entropy view)": 0.6926070038910506,
      "VeMo (min entropy view)": 0.7422680412371134,
      "VeMo (random view)": 0.6926070038910506,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is picking up an object from the left side and then placing it to his right side."
  },
  "001249": {
    "text": "a person steps forward and reaches down to grab or place something with their left hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.434793588687233,
      "Minus Multimodal Distance": -3.715146064758301,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4144110284396447e-05,
      "MoBERT-F": 0.3170308386375019,
      "MoBERT-N": 0.4782658766518006,
      "MoBERT-min(F/N)": 0.3170308386375019,
      "MoBERT-max(F/N)": 0.4782658766518006,
      "MotionCritic": -3.3935110569000244,
      "VeMo (human-opt view)": 0.928537170263789,
      "VeMo (max entropy view)": 0.7304015296367112,
      "VeMo (min entropy view)": 0.928537170263789,
      "VeMo (random view)": 0.7304015296367112,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps forward and reaches down to grab or place something with their left hand."
  },
  "002149": {
    "text": "a person steps forward, turns to the side and gestures several times.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7027236346703801,
      "Minus Multimodal Distance": -11.75798225402832,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.540743116696831e-05,
      "MoBERT-F": 0.32669468436417903,
      "MoBERT-N": 0.4844624654842396,
      "MoBERT-min(F/N)": 0.32669468436417903,
      "MoBERT-max(F/N)": 0.4844624654842396,
      "MotionCritic": -7.760011672973633,
      "VeMo (human-opt view)": 0.9048991354466859,
      "VeMo (max entropy view)": 0.8668730650154799,
      "VeMo (min entropy view)": 0.9048991354466859,
      "VeMo (random view)": 0.8668730650154799,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person steps forward, turns to the side, and gestures several times."
  },
  "004621": {
    "text": "slowly swinging the arms forward as the body walks slowly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3839809691010593,
      "Minus Multimodal Distance": -12.39379596710205,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.862164445919916e-05,
      "MoBERT-F": 0.30044054919971763,
      "MoBERT-N": 0.3722838430153122,
      "MoBERT-min(F/N)": 0.30044054919971763,
      "MoBERT-max(F/N)": 0.3722838430153122,
      "MotionCritic": -3.978874444961548,
      "VeMo (human-opt view)": 0.37755102040816324,
      "VeMo (max entropy view)": 0.37755102040816324,
      "VeMo (min entropy view)": 0.10067720090293454,
      "VeMo (random view)": 0.37755102040816324,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person slowly swings the arms forward as the body walks slowly."
  },
  "004438": {
    "text": "a person walks forward with their left leg limping.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.34573335652817305,
      "Minus Multimodal Distance": -10.95964527130127,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0002957430260721594,
      "MoBERT-F": 0.5360654961375027,
      "MoBERT-N": 0.5415645168426371,
      "MoBERT-min(F/N)": 0.5360654961375027,
      "MoBERT-max(F/N)": 0.5415645168426371,
      "MotionCritic": -4.3256916999816895,
      "VeMo (human-opt view)": 0.8081180811808119,
      "VeMo (max entropy view)": 0.7549668874172185,
      "VeMo (min entropy view)": 0.8081180811808119,
      "VeMo (random view)": 0.7549668874172185,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, limping with their left leg."
  },
  "013190": {
    "text": "a person beginning to run in a straight line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4591673075668682,
      "Minus Multimodal Distance": -5.9811835289001465,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.05238034948706627,
      "MoBERT-F": 0.5599926913768243,
      "MoBERT-N": 0.6208408996688702,
      "MoBERT-min(F/N)": 0.5599926913768243,
      "MoBERT-max(F/N)": 0.6208408996688702,
      "MotionCritic": -5.796412467956543,
      "VeMo (human-opt view)": 0.9796376913354866,
      "VeMo (max entropy view)": 0.9649309245483528,
      "VeMo (min entropy view)": 0.9796376913354866,
      "VeMo (random view)": 0.9796376913354866,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person begins to run in a straight line."
  },
  "010496": {
    "text": "a figure walks towards a wall and stares, confined.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.592670546279904,
      "Minus Multimodal Distance": -6.051819324493408,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.337656279560179e-05,
      "MoBERT-F": 0.4357239026748881,
      "MoBERT-N": 0.5036301168555136,
      "MoBERT-min(F/N)": 0.4357239026748881,
      "MoBERT-max(F/N)": 0.5036301168555136,
      "MotionCritic": -2.9720914363861084,
      "VeMo (human-opt view)": 0.453416149068323,
      "VeMo (max entropy view)": 0.453416149068323,
      "VeMo (min entropy view)": 0.7769230769230769,
      "VeMo (random view)": 0.7769230769230769,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure walks towards a wall and stares, confined."
  },
  "003027": {
    "text": "the person makes a right turn.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3867706205888537,
      "Minus Multimodal Distance": -9.654434204101562,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.342060543014668e-05,
      "MoBERT-F": 0.4272041377808834,
      "MoBERT-N": 0.5021933354579182,
      "MoBERT-min(F/N)": 0.4272041377808834,
      "MoBERT-max(F/N)": 0.5021933354579182,
      "MotionCritic": -2.5728628635406494,
      "VeMo (human-opt view)": 0.9466972711043101,
      "VeMo (max entropy view)": 0.9466972711043101,
      "VeMo (min entropy view)": 0.964964964964965,
      "VeMo (random view)": 0.964964964964965,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person makes a right turn."
  },
  "001897": {
    "text": "person aims and throws a baseball",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6726375024781411,
      "Minus Multimodal Distance": -7.660953044891357,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8267624378204346,
      "MoBERT-F": 0.7161414927570275,
      "MoBERT-N": 0.746389794292358,
      "MoBERT-min(F/N)": 0.7161414927570275,
      "MoBERT-max(F/N)": 0.746389794292358,
      "MotionCritic": -5.980282783508301,
      "VeMo (human-opt view)": 0.5616438356164384,
      "VeMo (max entropy view)": 0.46846846846846846,
      "VeMo (min entropy view)": 0.5616438356164384,
      "VeMo (random view)": 0.5616438356164384,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person aims and throws a baseball."
  },
  "013511": {
    "text": "stick figure starts running in place",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4987863108060479,
      "Minus Multimodal Distance": -3.8267745971679688,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.7361695170402527,
      "MoBERT-F": 0.8303006456858721,
      "MoBERT-N": 0.7483525947586184,
      "MoBERT-min(F/N)": 0.7483525947586184,
      "MoBERT-max(F/N)": 0.8303006456858721,
      "MotionCritic": -5.9338226318359375,
      "VeMo (human-opt view)": 0.844,
      "VeMo (max entropy view)": 0.8174672489082969,
      "VeMo (min entropy view)": 0.844,
      "VeMo (random view)": 0.8174672489082969,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person starts running in place."
  },
  "010007": {
    "text": "the person is playing peekaboo.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7815943754693256,
      "Minus Multimodal Distance": -9.883489608764648,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.004612069111317396,
      "MoBERT-F": 0.5320604101677748,
      "MoBERT-N": 0.44392891877950724,
      "MoBERT-min(F/N)": 0.44392891877950724,
      "MoBERT-max(F/N)": 0.5320604101677748,
      "MotionCritic": -3.6343400478363037,
      "VeMo (human-opt view)": 0.18248175182481752,
      "VeMo (max entropy view)": 0.5626822157434402,
      "VeMo (min entropy view)": 0.18248175182481752,
      "VeMo (random view)": 0.18248175182481752,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is playing peek - a - boo."
  },
  "004521": {
    "text": "a person is making rapid swinging motions with their right leg in the air, while holding onto something with their right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7214924693040802,
      "Minus Multimodal Distance": -4.026851654052734,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6458326829015277e-05,
      "MoBERT-F": 0.42234653428499114,
      "MoBERT-N": 0.34188130087540075,
      "MoBERT-min(F/N)": 0.34188130087540075,
      "MoBERT-max(F/N)": 0.42234653428499114,
      "MotionCritic": -0.629591166973114,
      "VeMo (human-opt view)": 0.12572944297082228,
      "VeMo (max entropy view)": 0.1917808219178082,
      "VeMo (min entropy view)": 0.12572944297082228,
      "VeMo (random view)": 0.1917808219178082,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is making rapid swinging motions with their right leg in the air while holding onto something with their right hand."
  },
  "006199": {
    "text": "a person picks something up on the left and sets it down on the right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.12053331847805061,
      "Minus Multimodal Distance": -3.1479504108428955,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.927036621258594e-05,
      "MoBERT-F": 0.2951670401629529,
      "MoBERT-N": 0.42721266950990294,
      "MoBERT-min(F/N)": 0.2951670401629529,
      "MoBERT-max(F/N)": 0.42721266950990294,
      "MotionCritic": -2.0542421340942383,
      "VeMo (human-opt view)": 0.7546468401486989,
      "VeMo (max entropy view)": 0.6932849364791288,
      "VeMo (min entropy view)": 0.7546468401486989,
      "VeMo (random view)": 0.7546468401486989,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person picks something up on the left and sets it down on the right."
  },
  "004157": {
    "text": "a man kicks with his right leg and then kicks with his left leg.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3744493727095929,
      "Minus Multimodal Distance": -9.63037395477295,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9880487322807312,
      "MoBERT-F": 0.8510063073855944,
      "MoBERT-N": 0.7236113003668482,
      "MoBERT-min(F/N)": 0.7236113003668482,
      "MoBERT-max(F/N)": 0.8510063073855944,
      "MotionCritic": 0.4606951177120209,
      "VeMo (human-opt view)": 0.9688776228700183,
      "VeMo (max entropy view)": 0.9578050301392642,
      "VeMo (min entropy view)": 0.9688776228700183,
      "VeMo (random view)": 0.9578050301392642,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man kicks with his right leg and then with his left leg."
  },
  "007037": {
    "text": "man walks in a circular motion then stops right before completing the circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.849504160375988,
      "Minus Multimodal Distance": -5.532063961029053,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.7268233299255371,
      "MoBERT-F": 0.6374618519598412,
      "MoBERT-N": 0.7388905512184007,
      "MoBERT-min(F/N)": 0.6374618519598412,
      "MoBERT-max(F/N)": 0.7388905512184007,
      "MotionCritic": -13.164562225341797,
      "VeMo (human-opt view)": 0.8873321657910099,
      "VeMo (max entropy view)": 0.8873321657910099,
      "VeMo (min entropy view)": 0.8934945308002303,
      "VeMo (random view)": 0.8873321657910099,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks in a circular motion and then stops right before completing the circle."
  },
  "013046": {
    "text": "the sim is standing still before extending their right arm up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.21235341694392135,
      "Minus Multimodal Distance": -9.1740083694458,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.4559856430860236e-05,
      "MoBERT-F": 0.33302136177487646,
      "MoBERT-N": 0.433520859799509,
      "MoBERT-min(F/N)": 0.33302136177487646,
      "MoBERT-max(F/N)": 0.433520859799509,
      "MotionCritic": -3.4966347217559814,
      "VeMo (human-opt view)": 0.994797869006581,
      "VeMo (max entropy view)": 0.9669064748201439,
      "VeMo (min entropy view)": 0.994797869006581,
      "VeMo (random view)": 0.9669064748201439,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The Sim is standing still before extending their right arm up."
  },
  "013468": {
    "text": "a person has his legs spread and arms crossed.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8248965220261056,
      "Minus Multimodal Distance": -5.71120023727417,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.6397017538547516e-05,
      "MoBERT-F": 0.3371997878102281,
      "MoBERT-N": 0.49667293556905573,
      "MoBERT-min(F/N)": 0.3371997878102281,
      "MoBERT-max(F/N)": 0.49667293556905573,
      "MotionCritic": -5.055622577667236,
      "VeMo (human-opt view)": 0.0002778900937327697,
      "VeMo (max entropy view)": 0.0002778900937327697,
      "VeMo (min entropy view)": 0.0002776770633439747,
      "VeMo (random view)": 0.0002778900937327697,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person has their legs spread and arms crossed."
  },
  "003721": {
    "text": "the person is walking forward in an odd way.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4624618490977354,
      "Minus Multimodal Distance": -6.806719779968262,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0004864584479946643,
      "MoBERT-F": 0.5410403981352603,
      "MoBERT-N": 0.5153013532267888,
      "MoBERT-min(F/N)": 0.5153013532267888,
      "MoBERT-max(F/N)": 0.5410403981352603,
      "MotionCritic": -1.6482394933700562,
      "VeMo (human-opt view)": 0.8873720136518771,
      "VeMo (max entropy view)": 0.8354948805460751,
      "VeMo (min entropy view)": 0.8873720136518771,
      "VeMo (random view)": 0.8354948805460751,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is walking forward in an odd way."
  },
  "008310": {
    "text": "person is walking backwards with their toes bent .",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3965381149475732,
      "Minus Multimodal Distance": -11.231605529785156,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9966365098953247,
      "MoBERT-F": 0.7132723564160868,
      "MoBERT-N": 0.7049425410755399,
      "MoBERT-min(F/N)": 0.7049425410755399,
      "MoBERT-max(F/N)": 0.7132723564160868,
      "MotionCritic": -12.575387954711914,
      "VeMo (human-opt view)": 0.6796407185628742,
      "VeMo (max entropy view)": 0.6796407185628742,
      "VeMo (min entropy view)": 0.8809411764705882,
      "VeMo (random view)": 0.6796407185628742,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking backwards with their toes bent."
  },
  "007126": {
    "text": "a person runs forward and stops short.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.36396111592817526,
      "Minus Multimodal Distance": -13.364476203918457,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8079944968339987e-05,
      "MoBERT-F": 0.4102762481411888,
      "MoBERT-N": 0.5076277786561759,
      "MoBERT-min(F/N)": 0.4102762481411888,
      "MoBERT-max(F/N)": 0.5076277786561759,
      "MotionCritic": -4.86682653427124,
      "VeMo (human-opt view)": 0.9908640769928168,
      "VeMo (max entropy view)": 0.9723455104131102,
      "VeMo (min entropy view)": 0.9908640769928168,
      "VeMo (random view)": 0.9908640769928168,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs forward and stops short."
  },
  "006521": {
    "text": "moving hands in a random pattern.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6006027249219682,
      "Minus Multimodal Distance": -4.947844982147217,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6074409106513485e-05,
      "MoBERT-F": 0.4100028244188852,
      "MoBERT-N": 0.40409557640329336,
      "MoBERT-min(F/N)": 0.40409557640329336,
      "MoBERT-max(F/N)": 0.4100028244188852,
      "MotionCritic": -9.338852882385254,
      "VeMo (human-opt view)": 0.9796516956920257,
      "VeMo (max entropy view)": 0.9363369245837414,
      "VeMo (min entropy view)": 0.9796516956920257,
      "VeMo (random view)": 0.9363369245837414,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is moving hands in a random pattern."
  },
  "008105": {
    "text": "a person eagerly practicing karate.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1796293744829633,
      "Minus Multimodal Distance": -6.061366558074951,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.006190064828842878,
      "MoBERT-F": 0.5775285121405669,
      "MoBERT-N": 0.6545174925974842,
      "MoBERT-min(F/N)": 0.5775285121405669,
      "MoBERT-max(F/N)": 0.6545174925974842,
      "MotionCritic": -4.796316146850586,
      "VeMo (human-opt view)": 0.9553183076314749,
      "VeMo (max entropy view)": 0.9553183076314749,
      "VeMo (min entropy view)": 0.9579158316633266,
      "VeMo (random view)": 0.9553183076314749,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is eagerly practicing karate."
  },
  "000704": {
    "text": "walking in a straight line",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5196763104154005,
      "Minus Multimodal Distance": -10.17858600616455,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.920651422755327e-05,
      "MoBERT-F": 0.4262257737182765,
      "MoBERT-N": 0.49082254675501225,
      "MoBERT-min(F/N)": 0.4262257737182765,
      "MoBERT-max(F/N)": 0.49082254675501225,
      "MotionCritic": -3.959824323654175,
      "VeMo (human-opt view)": 0.9796589274707211,
      "VeMo (max entropy view)": 0.9796589274707211,
      "VeMo (min entropy view)": 0.9902956039533116,
      "VeMo (random view)": 0.9902956039533116,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Walking in a straight line"
  },
  "001676": {
    "text": "a person places left hand on cround and gets into a seated position on the ground. the person uses left hand to brace themself and gets back to a standing position",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6625526832936703,
      "Minus Multimodal Distance": -5.413075923919678,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.7832382917404175,
      "MoBERT-F": 0.4516789178567344,
      "MoBERT-N": 0.4642756560114687,
      "MoBERT-min(F/N)": 0.4516789178567344,
      "MoBERT-max(F/N)": 0.4642756560114687,
      "MotionCritic": -8.437209129333496,
      "VeMo (human-opt view)": 0.8808227465214761,
      "VeMo (max entropy view)": 0.8808227465214761,
      "VeMo (min entropy view)": 0.9284750337381916,
      "VeMo (random view)": 0.8808227465214761,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person places their left hand on the ground and gets into a seated position on the ground. The person uses their left hand to brace themselves and gets back to a standing position."
  },
  "011339": {
    "text": "a person who is standing with his hands at his sides quickly runs forward and stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3890875714053229,
      "Minus Multimodal Distance": -8.341739654541016,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7783056793850847e-05,
      "MoBERT-F": 0.495610437473027,
      "MoBERT-N": 0.5148650921798579,
      "MoBERT-min(F/N)": 0.495610437473027,
      "MoBERT-max(F/N)": 0.5148650921798579,
      "MotionCritic": -9.850015640258789,
      "VeMo (human-opt view)": 0.993704537299801,
      "VeMo (max entropy view)": 0.9841308894082913,
      "VeMo (min entropy view)": 0.993704537299801,
      "VeMo (random view)": 0.9841308894082913,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who is standing with their hands at their sides quickly runs forward and then stops."
  },
  "014448": {
    "text": "a person walks forward, hops backwards, then defends themselves by putting their hands up in defense",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1695278955571824,
      "Minus Multimodal Distance": -10.218091011047363,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00011026070569641888,
      "MoBERT-F": 0.47500332921952454,
      "MoBERT-N": 0.5822492038906328,
      "MoBERT-min(F/N)": 0.47500332921952454,
      "MoBERT-max(F/N)": 0.5822492038906328,
      "MotionCritic": -4.296755313873291,
      "VeMo (human-opt view)": 0.8671931083991385,
      "VeMo (max entropy view)": 0.8671931083991385,
      "VeMo (min entropy view)": 0.8806290207290922,
      "VeMo (random view)": 0.8671931083991385,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, hops backwards, then defends themselves by putting their hands up in a defensive position."
  },
  "005296": {
    "text": "person swings arms up and down at the joints while bouncing around, as if dancing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9717031946936625,
      "Minus Multimodal Distance": -5.231542587280273,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9896799921989441,
      "MoBERT-F": 0.7885969623224305,
      "MoBERT-N": 0.726029398899817,
      "MoBERT-min(F/N)": 0.726029398899817,
      "MoBERT-max(F/N)": 0.7885969623224305,
      "MotionCritic": -9.292062759399414,
      "VeMo (human-opt view)": 0.9580838323353293,
      "VeMo (max entropy view)": 0.9580838323353293,
      "VeMo (min entropy view)": 0.960256068284876,
      "VeMo (random view)": 0.960256068284876,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person swings their arms up and down at the joints while bouncing around, as if dancing."
  },
  "012277": {
    "text": "person walks to pick something up then walks back to wipe something with it.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9897990549807553,
      "Minus Multimodal Distance": -6.582326412200928,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9863601326942444,
      "MoBERT-F": 0.6598325472875824,
      "MoBERT-N": 0.6477506885200561,
      "MoBERT-min(F/N)": 0.6477506885200561,
      "MoBERT-max(F/N)": 0.6598325472875824,
      "MotionCritic": -8.049948692321777,
      "VeMo (human-opt view)": 0.880466472303207,
      "VeMo (max entropy view)": 0.880466472303207,
      "VeMo (min entropy view)": 0.8871181938911022,
      "VeMo (random view)": 0.8871181938911022,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks to pick something up, then walks back to wipe something with it."
  },
  "003831": {
    "text": "a person swinging golf club or swinging and hitting ball with a bat.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8248061002105916,
      "Minus Multimodal Distance": -8.609353065490723,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.02517842687666416,
      "MoBERT-F": 0.7188770202466772,
      "MoBERT-N": 0.6329950393955228,
      "MoBERT-min(F/N)": 0.6329950393955228,
      "MoBERT-max(F/N)": 0.7188770202466772,
      "MotionCritic": -10.139886856079102,
      "VeMo (human-opt view)": 0.23397435897435898,
      "VeMo (max entropy view)": 0.23397435897435898,
      "VeMo (min entropy view)": 0.01098901098901099,
      "VeMo (random view)": 0.23397435897435898,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person swinging a golf club or swinging and hitting a ball with a bat."
  },
  "008633": {
    "text": "a man confidently walks down a ramp with a measured pace.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.32188968923140043,
      "Minus Multimodal Distance": -5.246433734893799,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.484698027023114e-05,
      "MoBERT-F": 0.36453219662279523,
      "MoBERT-N": 0.4543899595547315,
      "MoBERT-min(F/N)": 0.36453219662279523,
      "MoBERT-max(F/N)": 0.4543899595547315,
      "MotionCritic": -1.2546817064285278,
      "VeMo (human-opt view)": 0.7306122448979592,
      "VeMo (max entropy view)": 0.7306122448979592,
      "VeMo (min entropy view)": 0.7543859649122807,
      "VeMo (random view)": 0.7306122448979592,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man confidently walks down a ramp at a measured pace."
  },
  "014392": {
    "text": "a person is making signals with both his hands",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.254397462138253,
      "Minus Multimodal Distance": -9.74519157409668,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.733641617349349e-05,
      "MoBERT-F": 0.32084333190508646,
      "MoBERT-N": 0.41074284017440793,
      "MoBERT-min(F/N)": 0.32084333190508646,
      "MoBERT-max(F/N)": 0.41074284017440793,
      "MotionCritic": -11.892784118652344,
      "VeMo (human-opt view)": 0.9830890642615558,
      "VeMo (max entropy view)": 0.7302158273381295,
      "VeMo (min entropy view)": 0.9830890642615558,
      "VeMo (random view)": 0.7302158273381295,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is making signals with both his hands."
  },
  "010651": {
    "text": "the person ran forward to kick a soccer ball.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9107526831274539,
      "Minus Multimodal Distance": -7.627857208251953,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.2486717700958252,
      "MoBERT-F": 0.7203633404823467,
      "MoBERT-N": 0.6282569598497579,
      "MoBERT-min(F/N)": 0.6282569598497579,
      "MoBERT-max(F/N)": 0.7203633404823467,
      "MotionCritic": -4.879650592803955,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.4691588785046729,
      "VeMo (random view)": 0.5,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person ran forward to kick a soccer ball."
  },
  "010315": {
    "text": "the person is walking forward with the cake.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4560416741740879,
      "Minus Multimodal Distance": -8.90812873840332,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0007774134282954037,
      "MoBERT-F": 0.5062478115785987,
      "MoBERT-N": 0.5139116798334603,
      "MoBERT-min(F/N)": 0.5062478115785987,
      "MoBERT-max(F/N)": 0.5139116798334603,
      "MotionCritic": -0.8916736245155334,
      "VeMo (human-opt view)": 0.7542372881355932,
      "VeMo (max entropy view)": 0.5621468926553672,
      "VeMo (min entropy view)": 0.7542372881355932,
      "VeMo (random view)": 0.5621468926553672,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is walking forward with the cake."
  },
  "005537": {
    "text": "a figure walks upstairs without a handrail.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4135316127508794,
      "Minus Multimodal Distance": -4.202600002288818,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.1809038773644716e-05,
      "MoBERT-F": 0.41337886203315843,
      "MoBERT-N": 0.4673584162626723,
      "MoBERT-min(F/N)": 0.41337886203315843,
      "MoBERT-max(F/N)": 0.4673584162626723,
      "MotionCritic": -4.559309482574463,
      "VeMo (human-opt view)": 0.07598784194528875,
      "VeMo (max entropy view)": 0.2680851063829787,
      "VeMo (min entropy view)": 0.07598784194528875,
      "VeMo (random view)": 0.2680851063829787,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks upstairs without a handrail."
  },
  "005180": {
    "text": "a man pats himself on the head",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.223698628123732,
      "Minus Multimodal Distance": -8.763959884643555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.4484102798160166e-05,
      "MoBERT-F": 0.36872531499458616,
      "MoBERT-N": 0.4826346741955409,
      "MoBERT-min(F/N)": 0.36872531499458616,
      "MoBERT-max(F/N)": 0.4826346741955409,
      "MotionCritic": -1.4183012247085571,
      "VeMo (human-opt view)": 0.8667992047713717,
      "VeMo (max entropy view)": 0.8667992047713717,
      "VeMo (min entropy view)": 0.9526047786917352,
      "VeMo (random view)": 0.9526047786917352,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man pats himself on the head."
  },
  "006246": {
    "text": "a person takes a step forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.1507498792021386,
      "Minus Multimodal Distance": -7.446928024291992,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.2379001165973023e-05,
      "MoBERT-F": 0.40369154011224223,
      "MoBERT-N": 0.4936209319444891,
      "MoBERT-min(F/N)": 0.40369154011224223,
      "MoBERT-max(F/N)": 0.4936209319444891,
      "MotionCritic": -5.708090305328369,
      "VeMo (human-opt view)": 0.9809725158562368,
      "VeMo (max entropy view)": 0.9431737797689225,
      "VeMo (min entropy view)": 0.9809725158562368,
      "VeMo (random view)": 0.9809725158562368,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person takes a step forward."
  },
  "006705": {
    "text": "a person walks straight slowly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.27283087044953125,
      "Minus Multimodal Distance": -6.517505645751953,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.442621841211803e-05,
      "MoBERT-F": 0.3155091854671695,
      "MoBERT-N": 0.4763431852779579,
      "MoBERT-min(F/N)": 0.3155091854671695,
      "MoBERT-max(F/N)": 0.4763431852779579,
      "MotionCritic": -4.368819713592529,
      "VeMo (human-opt view)": 0.982048417132216,
      "VeMo (max entropy view)": 0.9754508435136707,
      "VeMo (min entropy view)": 0.982048417132216,
      "VeMo (random view)": 0.982048417132216,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks straight and slowly."
  },
  "009671": {
    "text": "a figure sprints forward confidently",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5754273977551463,
      "Minus Multimodal Distance": -8.881738662719727,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0009111827239394188,
      "MoBERT-F": 0.7021931289486325,
      "MoBERT-N": 0.6464001920141648,
      "MoBERT-min(F/N)": 0.6464001920141648,
      "MoBERT-max(F/N)": 0.7021931289486325,
      "MotionCritic": -10.220527648925781,
      "VeMo (human-opt view)": 0.8990694345025053,
      "VeMo (max entropy view)": 0.8522727272727273,
      "VeMo (min entropy view)": 0.8990694345025053,
      "VeMo (random view)": 0.8522727272727273,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure sprints forward confidently."
  },
  "011566": {
    "text": "spinning arms near chest.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4905654017261697,
      "Minus Multimodal Distance": -7.669926166534424,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.118391370866448e-05,
      "MoBERT-F": 0.346226351067262,
      "MoBERT-N": 0.4042056273517472,
      "MoBERT-min(F/N)": 0.346226351067262,
      "MoBERT-max(F/N)": 0.4042056273517472,
      "MotionCritic": -6.853429317474365,
      "VeMo (human-opt view)": 0.09557109557109557,
      "VeMo (max entropy view)": 0.11950873878129428,
      "VeMo (min entropy view)": 0.09557109557109557,
      "VeMo (random view)": 0.09557109557109557,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is spinning their arms near the chest."
  },
  "003082": {
    "text": "standing forward, hands are at the side moving toward sides of the head and back down while the legs are slightly jumping outwards. creating a jumping jack.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8555548785543184,
      "Minus Multimodal Distance": -7.526988506317139,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.02291412092745304,
      "MoBERT-F": 0.720490030821619,
      "MoBERT-N": 0.6329906246109315,
      "MoBERT-min(F/N)": 0.6329906246109315,
      "MoBERT-max(F/N)": 0.720490030821619,
      "MotionCritic": -6.99877405166626,
      "VeMo (human-opt view)": 0.844,
      "VeMo (max entropy view)": 0.8076923076923077,
      "VeMo (min entropy view)": 0.844,
      "VeMo (random view)": 0.844,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing forward, hands are at the sides, moving toward the sides of the head and back down while the legs are slightly jumping outwards, creating a jumping jack."
  },
  "009031": {
    "text": "a person takes small steps while holding something in both of their arms angled down towards the ground",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.003005636739503,
      "Minus Multimodal Distance": -10.65489387512207,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.140476034488529e-05,
      "MoBERT-F": 0.24480780686854414,
      "MoBERT-N": 0.4187012414864755,
      "MoBERT-min(F/N)": 0.24480780686854414,
      "MoBERT-max(F/N)": 0.4187012414864755,
      "MotionCritic": -0.7712475657463074,
      "VeMo (human-opt view)": 0.0008544511391763091,
      "VeMo (max entropy view)": 0.0008544511391763091,
      "VeMo (min entropy view)": 0.00016836984900386073,
      "VeMo (random view)": 0.00016836984900386073,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person takes small steps while holding something in both arms, with the arms angled down towards the ground."
  },
  "009554": {
    "text": "a person stands still for a second and then takes a step back with their right foot.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.14996213351637291,
      "Minus Multimodal Distance": -10.529420852661133,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.307708746229764e-05,
      "MoBERT-F": 0.36259685994570123,
      "MoBERT-N": 0.4739228995045115,
      "MoBERT-min(F/N)": 0.36259685994570123,
      "MoBERT-max(F/N)": 0.4739228995045115,
      "MotionCritic": -3.2344183921813965,
      "VeMo (human-opt view)": 0.9197149643705463,
      "VeMo (max entropy view)": 0.836036036036036,
      "VeMo (min entropy view)": 0.9197149643705463,
      "VeMo (random view)": 0.9197149643705463,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands still for a second and then takes a step back with their right foot."
  },
  "011687": {
    "text": "a person walks forward a few steps and then takes a drink with his right hand",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.49723798032220967,
      "Minus Multimodal Distance": -10.54037857055664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9783621357637458e-05,
      "MoBERT-F": 0.2910027502686292,
      "MoBERT-N": 0.45162959517344603,
      "MoBERT-min(F/N)": 0.2910027502686292,
      "MoBERT-max(F/N)": 0.45162959517344603,
      "MotionCritic": -2.918454647064209,
      "VeMo (human-opt view)": 0.021649314162948272,
      "VeMo (max entropy view)": 0.05364511691884457,
      "VeMo (min entropy view)": 0.021649314162948272,
      "VeMo (random view)": 0.05364511691884457,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward a few steps and then takes a drink with his right hand."
  },
  "004811": {
    "text": "person walks forward then turns to their right and carry on walking then stop",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5419239801111376,
      "Minus Multimodal Distance": -5.16622257232666,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9043363610981032e-05,
      "MoBERT-F": 0.3381865357110282,
      "MoBERT-N": 0.5279609999126682,
      "MoBERT-min(F/N)": 0.3381865357110282,
      "MoBERT-max(F/N)": 0.5279609999126682,
      "MotionCritic": -1.872415542602539,
      "VeMo (human-opt view)": 0.97833890332962,
      "VeMo (max entropy view)": 0.977008652657602,
      "VeMo (min entropy view)": 0.97833890332962,
      "VeMo (random view)": 0.97833890332962,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, then turns to their right and carries on walking, then stops."
  },
  "001567": {
    "text": "a person throws their hands outwards then back in front of them.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5192916082366327,
      "Minus Multimodal Distance": -6.976701736450195,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0019118365598842502,
      "MoBERT-F": 0.5281910147877229,
      "MoBERT-N": 0.5134965312654977,
      "MoBERT-min(F/N)": 0.5134965312654977,
      "MoBERT-max(F/N)": 0.5281910147877229,
      "MotionCritic": -6.050824165344238,
      "VeMo (human-opt view)": 0.978296382730455,
      "VeMo (max entropy view)": 0.9049773755656109,
      "VeMo (min entropy view)": 0.978296382730455,
      "VeMo (random view)": 0.9049773755656109,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person throws their hands outwards and then back in front of them."
  },
  "000815": {
    "text": "the man walks back and forth putting something on a shelf.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8534626796515948,
      "Minus Multimodal Distance": -2.928347587585449,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.0344948754645884e-05,
      "MoBERT-F": 0.35180429775227307,
      "MoBERT-N": 0.46266542598515703,
      "MoBERT-min(F/N)": 0.35180429775227307,
      "MoBERT-max(F/N)": 0.46266542598515703,
      "MotionCritic": -21.86079978942871,
      "VeMo (human-opt view)": 0.00010869313433580477,
      "VeMo (max entropy view)": 0.00026139838688850884,
      "VeMo (min entropy view)": 0.00010869313433580477,
      "VeMo (random view)": 0.00026139838688850884,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man walks back and forth, putting something on a shelf."
  },
  "009566": {
    "text": "a man swings a golf club and hits the ball",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7610110853974319,
      "Minus Multimodal Distance": -2.9413199424743652,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.07135099172592163,
      "MoBERT-F": 0.7303519045183045,
      "MoBERT-N": 0.6110319148651041,
      "MoBERT-min(F/N)": 0.6110319148651041,
      "MoBERT-max(F/N)": 0.7303519045183045,
      "MotionCritic": -9.10084342956543,
      "VeMo (human-opt view)": 0.000668154153286711,
      "VeMo (max entropy view)": 0.000668154153286711,
      "VeMo (min entropy view)": 0.0004300187488174484,
      "VeMo (random view)": 0.000668154153286711,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man swings a golf club and hits the ball."
  },
  "013638": {
    "text": "a person grabbed the leg and did something",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6836593580676259,
      "Minus Multimodal Distance": -3.649575710296631,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0013376130955293775,
      "MoBERT-F": 0.46433754391725224,
      "MoBERT-N": 0.49386351655797844,
      "MoBERT-min(F/N)": 0.46433754391725224,
      "MoBERT-max(F/N)": 0.49386351655797844,
      "MotionCritic": -5.173165798187256,
      "VeMo (human-opt view)": 0.6361556064073226,
      "VeMo (max entropy view)": 0.5922619047619048,
      "VeMo (min entropy view)": 0.6361556064073226,
      "VeMo (random view)": 0.5922619047619048,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person grabbed the leg and did something."
  },
  "012280": {
    "text": "a person walks forward quickly and then stops",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4997154197865928,
      "Minus Multimodal Distance": -3.6845178604125977,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5145127438008785e-05,
      "MoBERT-F": 0.4412457632079112,
      "MoBERT-N": 0.5931542327164109,
      "MoBERT-min(F/N)": 0.4412457632079112,
      "MoBERT-max(F/N)": 0.5931542327164109,
      "MotionCritic": -1.3502475023269653,
      "VeMo (human-opt view)": 0.9433419614610297,
      "VeMo (max entropy view)": 0.9400868306801736,
      "VeMo (min entropy view)": 0.9433419614610297,
      "VeMo (random view)": 0.9433419614610297,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward quickly and then stops."
  },
  "013404": {
    "text": "a figure boxes right to punch twice.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9047954464402428,
      "Minus Multimodal Distance": -6.209743976593018,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5972038201871328e-05,
      "MoBERT-F": 0.3167390502759123,
      "MoBERT-N": 0.41939292549117324,
      "MoBERT-min(F/N)": 0.3167390502759123,
      "MoBERT-max(F/N)": 0.41939292549117324,
      "MotionCritic": -7.1565165519714355,
      "VeMo (human-opt view)": 0.6782178217821783,
      "VeMo (max entropy view)": 0.6782178217821783,
      "VeMo (min entropy view)": 0.7661290322580645,
      "VeMo (random view)": 0.7661290322580645,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure boxes right to punch twice."
  },
  "009924": {
    "text": "the  person is running over a vault.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.7437351026951882,
      "Minus Multimodal Distance": -4.601840019226074,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.7574395537376404,
      "MoBERT-F": 0.8309919370445411,
      "MoBERT-N": 0.7203572508435389,
      "MoBERT-min(F/N)": 0.7203572508435389,
      "MoBERT-max(F/N)": 0.8309919370445411,
      "MotionCritic": -7.410198211669922,
      "VeMo (human-opt view)": 0.027637100274924033,
      "VeMo (max entropy view)": 0.027637100274924033,
      "VeMo (min entropy view)": 0.02040816326530612,
      "VeMo (random view)": 0.027637100274924033,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is running over a vault."
  },
  "013768": {
    "text": "moving the hands and work some thing.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3339887825460911,
      "Minus Multimodal Distance": -9.6382474899292,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.3407118937466294e-05,
      "MoBERT-F": 0.31244480986237333,
      "MoBERT-N": 0.3323583163835091,
      "MoBERT-min(F/N)": 0.31244480986237333,
      "MoBERT-max(F/N)": 0.3323583163835091,
      "MotionCritic": -9.65367317199707,
      "VeMo (human-opt view)": 0.9740326741186587,
      "VeMo (max entropy view)": 0.9101608473911338,
      "VeMo (min entropy view)": 0.9740326741186587,
      "VeMo (random view)": 0.9101608473911338,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is moving the hands and working something."
  },
  "013000": {
    "text": "a person gets pushed to their right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3885498818354534,
      "Minus Multimodal Distance": -4.727367877960205,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2270638510235585e-05,
      "MoBERT-F": 0.35207011905790875,
      "MoBERT-N": 0.4708539516249282,
      "MoBERT-min(F/N)": 0.35207011905790875,
      "MoBERT-max(F/N)": 0.4708539516249282,
      "MotionCritic": -7.122429370880127,
      "VeMo (human-opt view)": 0.9146757679180887,
      "VeMo (max entropy view)": 0.8743169398907104,
      "VeMo (min entropy view)": 0.9146757679180887,
      "VeMo (random view)": 0.9146757679180887,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person gets pushed to their right."
  },
  "004068": {
    "text": "a person takes a step backwards.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.1894019990371284,
      "Minus Multimodal Distance": -2.6194231510162354,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.11345883458852768,
      "MoBERT-F": 0.6748921785854768,
      "MoBERT-N": 0.6591503499666743,
      "MoBERT-min(F/N)": 0.6591503499666743,
      "MoBERT-max(F/N)": 0.6748921785854768,
      "MotionCritic": -0.7621961236000061,
      "VeMo (human-opt view)": 0.9098901098901099,
      "VeMo (max entropy view)": 0.8359046283309958,
      "VeMo (min entropy view)": 0.9098901098901099,
      "VeMo (random view)": 0.9098901098901099,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person takes a step backwards."
  },
  "001113": {
    "text": "this person is cleaning a table.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.46434121315867183,
      "Minus Multimodal Distance": -13.088074684143066,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.681985617731698e-05,
      "MoBERT-F": 0.40401933764985465,
      "MoBERT-N": 0.48578497137666055,
      "MoBERT-min(F/N)": 0.40401933764985465,
      "MoBERT-max(F/N)": 0.48578497137666055,
      "MotionCritic": -4.594217777252197,
      "VeMo (human-opt view)": 3.028009084027252e-05,
      "VeMo (max entropy view)": 0.00035742008826859095,
      "VeMo (min entropy view)": 3.028009084027252e-05,
      "VeMo (random view)": 3.028009084027252e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person is cleaning a table."
  },
  "008232": {
    "text": "a man pets a large dog.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6228035695366906,
      "Minus Multimodal Distance": -10.251508712768555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.000744050950743258,
      "MoBERT-F": 0.5593774894730981,
      "MoBERT-N": 0.47035583272257464,
      "MoBERT-min(F/N)": 0.47035583272257464,
      "MoBERT-max(F/N)": 0.5593774894730981,
      "MotionCritic": -12.300070762634277,
      "VeMo (human-opt view)": 4.2848188033734274e-05,
      "VeMo (max entropy view)": 8.752337702090295e-05,
      "VeMo (min entropy view)": 4.2848188033734274e-05,
      "VeMo (random view)": 8.752337702090295e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man pets a large dog."
  },
  "003800": {
    "text": "a person does the cat walk",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0246413136112194,
      "Minus Multimodal Distance": -8.081754684448242,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00020467817375902086,
      "MoBERT-F": 0.47023239180309084,
      "MoBERT-N": 0.5760428162813288,
      "MoBERT-min(F/N)": 0.47023239180309084,
      "MoBERT-max(F/N)": 0.5760428162813288,
      "MotionCritic": -1.763884425163269,
      "VeMo (human-opt view)": 0.5151515151515151,
      "VeMo (max entropy view)": 0.5151515151515151,
      "VeMo (min entropy view)": 0.5160142348754448,
      "VeMo (random view)": 0.5160142348754448,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does the catwalk."
  },
  "005937": {
    "text": "a person walks diagonally forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.26557097039987115,
      "Minus Multimodal Distance": -8.50983715057373,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00019490657723508775,
      "MoBERT-F": 0.5373087361059584,
      "MoBERT-N": 0.5895110330923113,
      "MoBERT-min(F/N)": 0.5373087361059584,
      "MoBERT-max(F/N)": 0.5895110330923113,
      "MotionCritic": -0.25938180088996887,
      "VeMo (human-opt view)": 0.9284750337381916,
      "VeMo (max entropy view)": 0.9284750337381916,
      "VeMo (min entropy view)": 0.9688206055128784,
      "VeMo (random view)": 0.9284750337381916,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks diagonally forward."
  },
  "012167": {
    "text": "the right hand flings forward to the left side, the left hand meets the right hand, and is drag back right the shoulder.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.703420585390631,
      "Minus Multimodal Distance": -5.322139263153076,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00019614581833593547,
      "MoBERT-F": 0.47370287239125497,
      "MoBERT-N": 0.45916389058095697,
      "MoBERT-min(F/N)": 0.45916389058095697,
      "MoBERT-max(F/N)": 0.47370287239125497,
      "MotionCritic": -6.122094631195068,
      "VeMo (human-opt view)": 0.8671513549239921,
      "VeMo (max entropy view)": 0.8433734939759037,
      "VeMo (min entropy view)": 0.8671513549239921,
      "VeMo (random view)": 0.8671513549239921,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The right hand flings forward to the left side. The left hand meets the right hand and is dragged back to the right shoulder."
  },
  "006383": {
    "text": "subject walks in a full circle, then side steps to turn around and walk around something to avoid running into.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9208498165827607,
      "Minus Multimodal Distance": -10.747038841247559,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9671683311462402,
      "MoBERT-F": 0.6906561807818186,
      "MoBERT-N": 0.7655672943780716,
      "MoBERT-min(F/N)": 0.6906561807818186,
      "MoBERT-max(F/N)": 0.7655672943780716,
      "MotionCritic": -11.00929069519043,
      "VeMo (human-opt view)": 0.23433874709976799,
      "VeMo (max entropy view)": 0.23433874709976799,
      "VeMo (min entropy view)": 0.2020330368487929,
      "VeMo (random view)": 0.2020330368487929,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The subject walks in a full circle. Then, it side - steps to turn around and walks around something to avoid running into it."
  },
  "008620": {
    "text": "the person kicks a ball with their left foot, their arms raised to a t-pose as they do so.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6474302432204883,
      "Minus Multimodal Distance": -7.248292922973633,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.16567499935626984,
      "MoBERT-F": 0.7484526763387724,
      "MoBERT-N": 0.6002473630007303,
      "MoBERT-min(F/N)": 0.6002473630007303,
      "MoBERT-max(F/N)": 0.7484526763387724,
      "MotionCritic": -8.587302207946777,
      "VeMo (human-opt view)": 0.18244803695150116,
      "VeMo (max entropy view)": 0.20169851380042464,
      "VeMo (min entropy view)": 0.18244803695150116,
      "VeMo (random view)": 0.18244803695150116,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person kicks a ball with their left foot, and their arms are raised to a T - pose as they do so."
  },
  "008904": {
    "text": "a person bends over and then picks up something with his left hand and then the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6576054642471506,
      "Minus Multimodal Distance": -3.1522912979125977,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00010178394586546347,
      "MoBERT-F": 0.3350663287171899,
      "MoBERT-N": 0.41917777668275713,
      "MoBERT-min(F/N)": 0.3350663287171899,
      "MoBERT-max(F/N)": 0.41917777668275713,
      "MotionCritic": -8.832168579101562,
      "VeMo (human-opt view)": 0.5146198830409356,
      "VeMo (max entropy view)": 0.5146198830409356,
      "VeMo (min entropy view)": 0.5308219178082192,
      "VeMo (random view)": 0.5308219178082192,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends over, picks up something with their left hand, and then with their right."
  },
  "011757": {
    "text": "a person raises both hands and claps their hands multiple times before returning to their original position.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.17027387368243002,
      "Minus Multimodal Distance": -7.845926761627197,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5083287255256437e-05,
      "MoBERT-F": 0.3552958263037608,
      "MoBERT-N": 0.4909143364242928,
      "MoBERT-min(F/N)": 0.3552958263037608,
      "MoBERT-max(F/N)": 0.4909143364242928,
      "MotionCritic": -0.817494809627533,
      "VeMo (human-opt view)": 0.9890404478435701,
      "VeMo (max entropy view)": 0.9526488513830286,
      "VeMo (min entropy view)": 0.9890404478435701,
      "VeMo (random view)": 0.9890404478435701,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises both hands and claps them multiple times before returning to their original position."
  },
  "003227": {
    "text": "a person walks back and forth from right to left, stops, and stands in one place.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7522521015005563,
      "Minus Multimodal Distance": -2.793959140777588,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.497900929185562e-05,
      "MoBERT-F": 0.42661321655642337,
      "MoBERT-N": 0.5819763390107765,
      "MoBERT-min(F/N)": 0.42661321655642337,
      "MoBERT-max(F/N)": 0.5819763390107765,
      "MotionCritic": -11.921018600463867,
      "VeMo (human-opt view)": 0.9602553870710295,
      "VeMo (max entropy view)": 0.8869690424766019,
      "VeMo (min entropy view)": 0.9602553870710295,
      "VeMo (random view)": 0.9602553870710295,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks back and forth from right to left, stops, and stands in one place."
  },
  "005933": {
    "text": "the person stands still in a slight squat and then turns to their left and walks.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6215901430452312,
      "Minus Multimodal Distance": -5.748630523681641,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.8288693758659065e-05,
      "MoBERT-F": 0.36820296933607793,
      "MoBERT-N": 0.43653373060519085,
      "MoBERT-min(F/N)": 0.36820296933607793,
      "MoBERT-max(F/N)": 0.43653373060519085,
      "MotionCritic": -9.332294464111328,
      "VeMo (human-opt view)": 0.9890129522108084,
      "VeMo (max entropy view)": 0.9841140529531568,
      "VeMo (min entropy view)": 0.9890129522108084,
      "VeMo (random view)": 0.9841140529531568,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person stands still in a slight squat, then turns to their left and walks."
  },
  "004015": {
    "text": "person walks forward with left hand extended to side, trying to feel something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3958139970064965,
      "Minus Multimodal Distance": -8.1690673828125,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.523764305806253e-05,
      "MoBERT-F": 0.39342129952360616,
      "MoBERT-N": 0.49604719240794826,
      "MoBERT-min(F/N)": 0.39342129952360616,
      "MoBERT-max(F/N)": 0.49604719240794826,
      "MotionCritic": -14.032124519348145,
      "VeMo (human-opt view)": 0.9101796407185628,
      "VeMo (max entropy view)": 0.9101796407185628,
      "VeMo (min entropy view)": 0.9689488910318226,
      "VeMo (random view)": 0.9101796407185628,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward with their left hand extended to the side, trying to feel something."
  },
  "006204": {
    "text": "a person walks forward with left foot, then trips and continues walking forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.40669269084161724,
      "Minus Multimodal Distance": -6.3583855628967285,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.243386163376272e-05,
      "MoBERT-F": 0.47026234433080655,
      "MoBERT-N": 0.5731206941956659,
      "MoBERT-min(F/N)": 0.47026234433080655,
      "MoBERT-max(F/N)": 0.5731206941956659,
      "MotionCritic": 0.3759191930294037,
      "VeMo (human-opt view)": 0.6789772727272727,
      "VeMo (max entropy view)": 0.6374501992031872,
      "VeMo (min entropy view)": 0.6789772727272727,
      "VeMo (random view)": 0.6374501992031872,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward with the left foot, then trips and continues walking forward."
  },
  "005356": {
    "text": "a person repeatedly blocks their face with their left arm.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6179180829264016,
      "Minus Multimodal Distance": -8.229453086853027,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7028672775486484e-05,
      "MoBERT-F": 0.3307052994771468,
      "MoBERT-N": 0.4124276975602072,
      "MoBERT-min(F/N)": 0.3307052994771468,
      "MoBERT-max(F/N)": 0.4124276975602072,
      "MotionCritic": -7.603506088256836,
      "VeMo (human-opt view)": 0.11335012594458438,
      "VeMo (max entropy view)": 0.5478547854785478,
      "VeMo (min entropy view)": 0.11335012594458438,
      "VeMo (random view)": 0.11335012594458438,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person repeatedly blocks their face with their left arm."
  },
  "007134": {
    "text": "the figure is walking in a counter clockwise motion slowly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8412163445276432,
      "Minus Multimodal Distance": -8.466519355773926,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00044037323095835745,
      "MoBERT-F": 0.5226883764905156,
      "MoBERT-N": 0.5701447687950996,
      "MoBERT-min(F/N)": 0.5226883764905156,
      "MoBERT-max(F/N)": 0.5701447687950996,
      "MotionCritic": -11.649072647094727,
      "VeMo (human-opt view)": 0.899165061014772,
      "VeMo (max entropy view)": 0.8990578734858681,
      "VeMo (min entropy view)": 0.899165061014772,
      "VeMo (random view)": 0.8990578734858681,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The figure is walking in a counter - clockwise motion slowly."
  },
  "011435": {
    "text": "a person has his right hand in the air.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4264357478548203,
      "Minus Multimodal Distance": -7.855719089508057,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.1684761779615656e-05,
      "MoBERT-F": 0.38728102782034146,
      "MoBERT-N": 0.4197559330860697,
      "MoBERT-min(F/N)": 0.38728102782034146,
      "MoBERT-max(F/N)": 0.4197559330860697,
      "MotionCritic": -5.337499141693115,
      "VeMo (human-opt view)": 0.8809523809523809,
      "VeMo (max entropy view)": 0.8664451827242525,
      "VeMo (min entropy view)": 0.8809523809523809,
      "VeMo (random view)": 0.8664451827242525,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person has his right hand in the air."
  },
  "002647": {
    "text": "person walks on hands and knees then stands up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8079105440873005,
      "Minus Multimodal Distance": -7.315499782562256,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.434910831856541e-05,
      "MoBERT-F": 0.4642490645473021,
      "MoBERT-N": 0.4701888769190817,
      "MoBERT-min(F/N)": 0.4642490645473021,
      "MoBERT-max(F/N)": 0.4701888769190817,
      "MotionCritic": -4.289488792419434,
      "VeMo (human-opt view)": 0.9979439274551051,
      "VeMo (max entropy view)": 0.9953917050691244,
      "VeMo (min entropy view)": 0.9979439274551051,
      "VeMo (random view)": 0.9953917050691244,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks on their hands and knees and then stands up."
  },
  "006351": {
    "text": "the person holds something up to their face.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3061577764085605,
      "Minus Multimodal Distance": -5.423604965209961,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.544017090462148e-05,
      "MoBERT-F": 0.3175708102033739,
      "MoBERT-N": 0.39536152290420284,
      "MoBERT-min(F/N)": 0.3175708102033739,
      "MoBERT-max(F/N)": 0.39536152290420284,
      "MotionCritic": -9.960196495056152,
      "VeMo (human-opt view)": 0.9782953943885654,
      "VeMo (max entropy view)": 0.8270944741532977,
      "VeMo (min entropy view)": 0.9782953943885654,
      "VeMo (random view)": 0.8270944741532977,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person holds something up to their face."
  },
  "007097": {
    "text": "person was walking down the path around things.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5341247945473151,
      "Minus Multimodal Distance": -7.374371528625488,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.447127892286517e-05,
      "MoBERT-F": 0.45068464758737614,
      "MoBERT-N": 0.5987603039412099,
      "MoBERT-min(F/N)": 0.45068464758737614,
      "MoBERT-max(F/N)": 0.5987603039412099,
      "MotionCritic": -1.9114195108413696,
      "VeMo (human-opt view)": 0.7881481481481482,
      "VeMo (max entropy view)": 0.7658862876254181,
      "VeMo (min entropy view)": 0.7881481481481482,
      "VeMo (random view)": 0.7881481481481482,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person was walking down the path around some things."
  },
  "012789": {
    "text": "person is throwing and catching a football.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6127248783956605,
      "Minus Multimodal Distance": -5.8893818855285645,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9477648735046387,
      "MoBERT-F": 0.709644818988698,
      "MoBERT-N": 0.6319539813185375,
      "MoBERT-min(F/N)": 0.6319539813185375,
      "MoBERT-max(F/N)": 0.709644818988698,
      "MotionCritic": -3.271728038787842,
      "VeMo (human-opt view)": 1.5258556235409006e-05,
      "VeMo (max entropy view)": 0.0006268877870861116,
      "VeMo (min entropy view)": 1.5258556235409006e-05,
      "VeMo (random view)": 0.0006268877870861116,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is throwing and catching a football."
  },
  "007027": {
    "text": "a person appears to scratch their head.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.15170467131395832,
      "Minus Multimodal Distance": -3.1942312717437744,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.371747465687804e-05,
      "MoBERT-F": 0.31161199997372224,
      "MoBERT-N": 0.3819324538492722,
      "MoBERT-min(F/N)": 0.31161199997372224,
      "MoBERT-max(F/N)": 0.3819324538492722,
      "MotionCritic": -6.995285511016846,
      "VeMo (human-opt view)": 0.955329500221141,
      "VeMo (max entropy view)": 0.955329500221141,
      "VeMo (min entropy view)": 0.9984985244119221,
      "VeMo (random view)": 0.955329500221141,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person appears to scratch their head."
  },
  "011419": {
    "text": "a figure winds up for the pitch.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9321295685514801,
      "Minus Multimodal Distance": -3.192640542984009,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.06912099570035934,
      "MoBERT-F": 0.6600364444319886,
      "MoBERT-N": 0.6371368643585882,
      "MoBERT-min(F/N)": 0.6371368643585882,
      "MoBERT-max(F/N)": 0.6600364444319886,
      "MotionCritic": -5.833804607391357,
      "VeMo (human-opt view)": 0.3929236499068901,
      "VeMo (max entropy view)": 0.3929236499068901,
      "VeMo (min entropy view)": 0.30638852672750977,
      "VeMo (random view)": 0.3929236499068901,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A figure winds up for the pitch."
  },
  "003432": {
    "text": "a person sits on a ledge",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1189909206481692,
      "Minus Multimodal Distance": -6.719447135925293,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.4205604606540874e-05,
      "MoBERT-F": 0.3851809507403168,
      "MoBERT-N": 0.46953872873446073,
      "MoBERT-min(F/N)": 0.3851809507403168,
      "MoBERT-max(F/N)": 0.46953872873446073,
      "MotionCritic": -5.032048225402832,
      "VeMo (human-opt view)": 0.020261143628995948,
      "VeMo (max entropy view)": 0.07560627674750357,
      "VeMo (min entropy view)": 0.020261143628995948,
      "VeMo (random view)": 0.07560627674750357,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits on a ledge."
  },
  "013471": {
    "text": "a person kicked with right leg",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6713861822221875,
      "Minus Multimodal Distance": -2.4978113174438477,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9411952495574951,
      "MoBERT-F": 0.7140921433295093,
      "MoBERT-N": 0.606794805199959,
      "MoBERT-min(F/N)": 0.606794805199959,
      "MoBERT-max(F/N)": 0.7140921433295093,
      "MotionCritic": -8.062207221984863,
      "VeMo (human-opt view)": 0.9830735963015436,
      "VeMo (max entropy view)": 0.9770104497955475,
      "VeMo (min entropy view)": 0.9830735963015436,
      "VeMo (random view)": 0.9830735963015436,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person kicked with the right leg."
  },
  "010392": {
    "text": "a person sits their motion stutters",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1298481032581398,
      "Minus Multimodal Distance": -12.722182273864746,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6755064027383924e-05,
      "MoBERT-F": 0.28420120705247204,
      "MoBERT-N": 0.3689798628799368,
      "MoBERT-min(F/N)": 0.28420120705247204,
      "MoBERT-max(F/N)": 0.3689798628799368,
      "MotionCritic": -6.112138271331787,
      "VeMo (human-opt view)": 0.7304015296367112,
      "VeMo (max entropy view)": 0.7304015296367112,
      "VeMo (min entropy view)": 0.7876182287188306,
      "VeMo (random view)": 0.7304015296367112,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits there, while their motion stutters."
  },
  "011767": {
    "text": "a person is startled from behind.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3800817528216007,
      "Minus Multimodal Distance": -5.091271877288818,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8016889691352844,
      "MoBERT-F": 0.7397908081901032,
      "MoBERT-N": 0.6282119537843791,
      "MoBERT-min(F/N)": 0.6282119537843791,
      "MoBERT-max(F/N)": 0.7397908081901032,
      "MotionCritic": -5.3268656730651855,
      "VeMo (human-opt view)": 0.001247533286456325,
      "VeMo (max entropy view)": 0.11233631977946244,
      "VeMo (min entropy view)": 0.001247533286456325,
      "VeMo (random view)": 0.11233631977946244,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is startled from behind."
  },
  "009359": {
    "text": "a person with both feet on the ground with both knees bended moving from one side to another, trying to hide or something",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.281123807854027,
      "Minus Multimodal Distance": -7.963406562805176,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9296901226043701,
      "MoBERT-F": 0.516757321879701,
      "MoBERT-N": 0.40287249607614684,
      "MoBERT-min(F/N)": 0.40287249607614684,
      "MoBERT-max(F/N)": 0.516757321879701,
      "MotionCritic": -4.786461353302002,
      "VeMo (human-opt view)": 0.7424400417101147,
      "VeMo (max entropy view)": 0.7424400417101147,
      "VeMo (min entropy view)": 0.7431906614785992,
      "VeMo (random view)": 0.7431906614785992,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person with both feet on the ground and both knees bent is moving from one side to another, trying to hide or something."
  },
  "000749": {
    "text": "a man using both hands to lift something off ground and places it back on ground in a slightly different position",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.14692547317464363,
      "Minus Multimodal Distance": -9.840315818786621,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.102680031792261e-05,
      "MoBERT-F": 0.2632067479811583,
      "MoBERT-N": 0.3904256893519888,
      "MoBERT-min(F/N)": 0.2632067479811583,
      "MoBERT-max(F/N)": 0.3904256893519888,
      "MotionCritic": -2.582425117492676,
      "VeMo (human-opt view)": 0.008059516429014259,
      "VeMo (max entropy view)": 0.008059516429014259,
      "VeMo (min entropy view)": 0.00023085078070205105,
      "VeMo (random view)": 0.008059516429014259,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man uses both hands to lift something off the ground and places it back on the ground in a slightly different position."
  },
  "010223": {
    "text": "a man turns to his left and brings his hands up to touch in front of his chest as he kicks something with his left leg.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6194540573272662,
      "Minus Multimodal Distance": -12.991412162780762,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5597372651100159,
      "MoBERT-F": 0.7460243419549214,
      "MoBERT-N": 0.5993210054381258,
      "MoBERT-min(F/N)": 0.5993210054381258,
      "MoBERT-max(F/N)": 0.7460243419549214,
      "MotionCritic": -5.78928279876709,
      "VeMo (human-opt view)": 0.8175824175824176,
      "VeMo (max entropy view)": 0.8175824175824176,
      "VeMo (min entropy view)": 0.8180790960451978,
      "VeMo (random view)": 0.8175824175824176,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man turns to his left and brings his hands up to touch each other in front of his chest as he kicks something with his left leg."
  },
  "007543": {
    "text": "a person slightly bent over with left hand pressing against the air walks forward slowly",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6861031384451792,
      "Minus Multimodal Distance": -8.00086498260498,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5303579604951665e-05,
      "MoBERT-F": 0.3062647568713869,
      "MoBERT-N": 0.42873234922312364,
      "MoBERT-min(F/N)": 0.3062647568713869,
      "MoBERT-max(F/N)": 0.42873234922312364,
      "MotionCritic": -3.5792455673217773,
      "VeMo (human-opt view)": 0.7556029882604055,
      "VeMo (max entropy view)": 0.7556029882604055,
      "VeMo (min entropy view)": 0.8597899938233478,
      "VeMo (random view)": 0.7556029882604055,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person, slightly bent over with the left hand pressing against the air, walks forward slowly."
  },
  "009577": {
    "text": "the sim appears to be adjusting a watch that is placed on their right wrist.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.30428782162229895,
      "Minus Multimodal Distance": -5.947378635406494,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.572708374122158e-05,
      "MoBERT-F": 0.3143369552424308,
      "MoBERT-N": 0.3850414181865109,
      "MoBERT-min(F/N)": 0.3143369552424308,
      "MoBERT-max(F/N)": 0.3850414181865109,
      "MotionCritic": -3.9237420558929443,
      "VeMo (human-opt view)": 0.011665967268149391,
      "VeMo (max entropy view)": 0.01596688350088705,
      "VeMo (min entropy view)": 0.011665967268149391,
      "VeMo (random view)": 0.011665967268149391,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The sim appears to be adjusting a watch that is placed on their right wrist."
  },
  "000344": {
    "text": "a person uses the left arm to demonstrate throwing an object in front of them",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8886944134585245,
      "Minus Multimodal Distance": -5.665963649749756,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.08562903851270676,
      "MoBERT-F": 0.6667381749468773,
      "MoBERT-N": 0.5721378053441192,
      "MoBERT-min(F/N)": 0.5721378053441192,
      "MoBERT-max(F/N)": 0.6667381749468773,
      "MotionCritic": -6.346161842346191,
      "VeMo (human-opt view)": 0.8591885441527446,
      "VeMo (max entropy view)": 0.8591885441527446,
      "VeMo (min entropy view)": 0.8741854636591478,
      "VeMo (random view)": 0.8591885441527446,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person uses their left arm to demonstrate throwing an object in front of them."
  },
  "003566": {
    "text": "person quickly walks in a  clockwise position with shoulders facing back happy",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5325008329141959,
      "Minus Multimodal Distance": -6.817885398864746,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9814156293869019,
      "MoBERT-F": 0.6667767131290605,
      "MoBERT-N": 0.6333452006819824,
      "MoBERT-min(F/N)": 0.6333452006819824,
      "MoBERT-max(F/N)": 0.6667767131290605,
      "MotionCritic": -17.037967681884766,
      "VeMo (human-opt view)": 0.3350253807106599,
      "VeMo (max entropy view)": 0.3350253807106599,
      "VeMo (min entropy view)": 0.20210896309314588,
      "VeMo (random view)": 0.3350253807106599,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person quickly walks in a clockwise direction with their shoulders facing backward, looking happy."
  },
  "000708": {
    "text": "she jumps up and down, kicking her heels in the air.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0398837334211017,
      "Minus Multimodal Distance": -8.445135116577148,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.21682044863700867,
      "MoBERT-F": 0.6986464945047104,
      "MoBERT-N": 0.7455761697993345,
      "MoBERT-min(F/N)": 0.6986464945047104,
      "MoBERT-max(F/N)": 0.7455761697993345,
      "MotionCritic": -5.453348636627197,
      "VeMo (human-opt view)": 0.005211666819054586,
      "VeMo (max entropy view)": 0.005211666819054586,
      "VeMo (min entropy view)": 0.00020384183431683772,
      "VeMo (random view)": 0.00020384183431683772,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "She jumps up and down, kicking her heels in the air."
  },
  "010785": {
    "text": "a person marches forward, turns around, and then marches back.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.69443896840421,
      "Minus Multimodal Distance": -7.912703037261963,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9911705255508423,
      "MoBERT-F": 0.7495588748457434,
      "MoBERT-N": 0.7144095839589775,
      "MoBERT-min(F/N)": 0.7144095839589775,
      "MoBERT-max(F/N)": 0.7495588748457434,
      "MotionCritic": -11.924325942993164,
      "VeMo (human-opt view)": 0.9362928797924474,
      "VeMo (max entropy view)": 0.8519269776876268,
      "VeMo (min entropy view)": 0.9362928797924474,
      "VeMo (random view)": 0.9362928797924474,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person marches forward, turns around, and then marches back."
  },
  "003379": {
    "text": "the character scratches his head with his right arm",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.21239818348664238,
      "Minus Multimodal Distance": -7.5500054359436035,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.758453360409476e-05,
      "MoBERT-F": 0.2961069265300049,
      "MoBERT-N": 0.3603954302942854,
      "MoBERT-min(F/N)": 0.2961069265300049,
      "MoBERT-max(F/N)": 0.3603954302942854,
      "MotionCritic": -3.631373405456543,
      "VeMo (human-opt view)": 0.8593238822246456,
      "VeMo (max entropy view)": 0.8593238822246456,
      "VeMo (min entropy view)": 0.9840476648087642,
      "VeMo (random view)": 0.9840476648087642,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The character scratches his head with his right arm."
  },
  "009401": {
    "text": "the person is doing a pectoral warm up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5267362134429434,
      "Minus Multimodal Distance": -9.32081413269043,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.375069925619755e-05,
      "MoBERT-F": 0.4509326047179568,
      "MoBERT-N": 0.4799791474945511,
      "MoBERT-min(F/N)": 0.4509326047179568,
      "MoBERT-max(F/N)": 0.4799791474945511,
      "MotionCritic": -3.772963047027588,
      "VeMo (human-opt view)": 0.6369168356997972,
      "VeMo (max entropy view)": 0.6369168356997972,
      "VeMo (min entropy view)": 0.6655405405405406,
      "VeMo (random view)": 0.6655405405405406,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is doing a pectoral warm - up."
  },
  "014420": {
    "text": "someone nervously pacing around in a circle",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5135743769117154,
      "Minus Multimodal Distance": -6.220386505126953,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.9578761061420664e-05,
      "MoBERT-F": 0.45629116709997414,
      "MoBERT-N": 0.6279401448138212,
      "MoBERT-min(F/N)": 0.45629116709997414,
      "MoBERT-max(F/N)": 0.6279401448138212,
      "MotionCritic": -10.270605087280273,
      "VeMo (human-opt view)": 0.6371308016877637,
      "VeMo (max entropy view)": 0.6371308016877637,
      "VeMo (min entropy view)": 0.7060869565217391,
      "VeMo (random view)": 0.6371308016877637,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone is nervously pacing around in a circle."
  },
  "006568": {
    "text": "a person wipes down a tabletop.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.2713988623742438,
      "Minus Multimodal Distance": -6.814562797546387,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5703364372020587e-05,
      "MoBERT-F": 0.5080383424471561,
      "MoBERT-N": 0.5392123734829352,
      "MoBERT-min(F/N)": 0.5080383424471561,
      "MoBERT-max(F/N)": 0.5392123734829352,
      "MotionCritic": -4.828408718109131,
      "VeMo (human-opt view)": 5.648679410173986e-05,
      "VeMo (max entropy view)": 0.00043167121837574485,
      "VeMo (min entropy view)": 5.648679410173986e-05,
      "VeMo (random view)": 0.00043167121837574485,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person wipes down a tabletop."
  },
  "010967": {
    "text": "a person getting shade from arm",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.35889933056796947,
      "Minus Multimodal Distance": -4.886412620544434,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.7176632253685966e-05,
      "MoBERT-F": 0.3310486628490086,
      "MoBERT-N": 0.37178400982986504,
      "MoBERT-min(F/N)": 0.3310486628490086,
      "MoBERT-max(F/N)": 0.37178400982986504,
      "MotionCritic": -14.062861442565918,
      "VeMo (human-opt view)": 0.003579748849366441,
      "VeMo (max entropy view)": 0.17307692307692307,
      "VeMo (min entropy view)": 0.003579748849366441,
      "VeMo (random view)": 0.17307692307692307,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is getting shade from their arm."
  },
  "010600": {
    "text": "the man take 4 excited steps forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4529788566175828,
      "Minus Multimodal Distance": -7.845534801483154,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0010644089197739959,
      "MoBERT-F": 0.5827003752038531,
      "MoBERT-N": 0.6455245854735286,
      "MoBERT-min(F/N)": 0.5827003752038531,
      "MoBERT-max(F/N)": 0.6455245854735286,
      "MotionCritic": -2.419497489929199,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.43847874720357943,
      "VeMo (random view)": 0.5,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man takes 4 excited steps forward."
  },
  "004439": {
    "text": "a person walks from the left hand side to the right hand side in a waving path and then stops while facing towards the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9804056860454655,
      "Minus Multimodal Distance": -3.220977783203125,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.710590292233974e-05,
      "MoBERT-F": 0.27057054319560425,
      "MoBERT-N": 0.44442443312541813,
      "MoBERT-min(F/N)": 0.27057054319560425,
      "MoBERT-max(F/N)": 0.44442443312541813,
      "MotionCritic": -1.5675424337387085,
      "VeMo (human-opt view)": 0.8808227465214761,
      "VeMo (max entropy view)": 0.8739837398373984,
      "VeMo (min entropy view)": 0.8808227465214761,
      "VeMo (random view)": 0.8808227465214761,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks from the left - hand side to the right - hand side in a wavy path and then stops while facing right."
  },
  "010996": {
    "text": "a person stands on his left leg, swings his right leg out to the side, back, and then he returns to a two-legged standing position.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.44054763840078937,
      "Minus Multimodal Distance": -4.83885383605957,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9866774082183838,
      "MoBERT-F": 0.700659065880562,
      "MoBERT-N": 0.6271711475987045,
      "MoBERT-min(F/N)": 0.6271711475987045,
      "MoBERT-max(F/N)": 0.700659065880562,
      "MotionCritic": -5.946329593658447,
      "VeMo (human-opt view)": 0.9783743475018642,
      "VeMo (max entropy view)": 0.9770385999751768,
      "VeMo (min entropy view)": 0.9783743475018642,
      "VeMo (random view)": 0.9770385999751768,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands on their left leg, swings their right leg out to the side and then back, before returning to a two - legged standing position."
  },
  "006765": {
    "text": "a standing person seems to be shoved from behind, then regains their balance after a quick stumble.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.38753953998885665,
      "Minus Multimodal Distance": -5.99931526184082,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.606278187362477e-05,
      "MoBERT-F": 0.31026117060892827,
      "MoBERT-N": 0.4494801632858836,
      "MoBERT-min(F/N)": 0.31026117060892827,
      "MoBERT-max(F/N)": 0.4494801632858836,
      "MotionCritic": -7.013433933258057,
      "VeMo (human-opt view)": 0.967032967032967,
      "VeMo (max entropy view)": 0.9364791288566243,
      "VeMo (min entropy view)": 0.967032967032967,
      "VeMo (random view)": 0.967032967032967,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A standing person seems to be shoved from behind. Then, they regain their balance after a quick stumble."
  },
  "011629": {
    "text": "a person bends over and picks an object up with both hands and stands up straight.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4500722144457085,
      "Minus Multimodal Distance": -10.380964279174805,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.010352374985814095,
      "MoBERT-F": 0.45653745384820266,
      "MoBERT-N": 0.44578431875870517,
      "MoBERT-min(F/N)": 0.44578431875870517,
      "MoBERT-max(F/N)": 0.45653745384820266,
      "MotionCritic": -8.112571716308594,
      "VeMo (human-opt view)": 0.7436743674367436,
      "VeMo (max entropy view)": 0.6926070038910506,
      "VeMo (min entropy view)": 0.7436743674367436,
      "VeMo (random view)": 0.7436743674367436,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends over, picks up an object with both hands, and stands up straight."
  },
  "002750": {
    "text": "a man supports himself with his right hand, carefully going down to his knees.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3892965151525663,
      "Minus Multimodal Distance": -7.367116928100586,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00033311263541691005,
      "MoBERT-F": 0.44749351469378634,
      "MoBERT-N": 0.5141955792295896,
      "MoBERT-min(F/N)": 0.44749351469378634,
      "MoBERT-max(F/N)": 0.5141955792295896,
      "MotionCritic": -5.690493106842041,
      "VeMo (human-opt view)": 0.964824120603015,
      "VeMo (max entropy view)": 0.8869565217391304,
      "VeMo (min entropy view)": 0.964824120603015,
      "VeMo (random view)": 0.8869565217391304,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man supports himself with his right hand and carefully goes down to his knees."
  },
  "004158": {
    "text": "a person walks forward using their left hand to steady themselves on an object.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3824438652425147,
      "Minus Multimodal Distance": -10.807609558105469,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.236446718801744e-05,
      "MoBERT-F": 0.3199540584296552,
      "MoBERT-N": 0.4260027237211941,
      "MoBERT-min(F/N)": 0.3199540584296552,
      "MoBERT-max(F/N)": 0.4260027237211941,
      "MotionCritic": -9.24566650390625,
      "VeMo (human-opt view)": 0.3349397590361446,
      "VeMo (max entropy view)": 0.40672782874617736,
      "VeMo (min entropy view)": 0.3349397590361446,
      "VeMo (random view)": 0.40672782874617736,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, using their left hand to steady themselves on an object."
  },
  "004183": {
    "text": "a person briskly walks foward swinging their left arm back and forth.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.446647347687389,
      "Minus Multimodal Distance": -5.563496112823486,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9123885631561279,
      "MoBERT-F": 0.6743943640637162,
      "MoBERT-N": 0.5939772012818868,
      "MoBERT-min(F/N)": 0.5939772012818868,
      "MoBERT-max(F/N)": 0.6743943640637162,
      "MotionCritic": -5.520051002502441,
      "VeMo (human-opt view)": 0.5778443113772455,
      "VeMo (max entropy view)": 0.5778443113772455,
      "VeMo (min entropy view)": 0.6363636363636364,
      "VeMo (random view)": 0.5778443113772455,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person briskly walks forward, swinging their left arm back and forth."
  },
  "002397": {
    "text": "a person paces from left to right and vice versa.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7420344993387551,
      "Minus Multimodal Distance": -6.75502872467041,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00010302624286850914,
      "MoBERT-F": 0.47846195455175466,
      "MoBERT-N": 0.5677550393919235,
      "MoBERT-min(F/N)": 0.47846195455175466,
      "MoBERT-max(F/N)": 0.5677550393919235,
      "MotionCritic": -9.882418632507324,
      "VeMo (human-opt view)": 0.9625425652667423,
      "VeMo (max entropy view)": 0.9604249667994688,
      "VeMo (min entropy view)": 0.9625425652667423,
      "VeMo (random view)": 0.9625425652667423,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person paces from left to right and vice versa."
  },
  "004366": {
    "text": "the person was laying down on their back.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6483110118232112,
      "Minus Multimodal Distance": -1.9519950151443481,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.987184464931488,
      "MoBERT-F": 0.6390440375869233,
      "MoBERT-N": 0.5164248816858593,
      "MoBERT-min(F/N)": 0.5164248816858593,
      "MoBERT-max(F/N)": 0.6390440375869233,
      "MotionCritic": -9.559307098388672,
      "VeMo (human-opt view)": 0.8931297709923665,
      "VeMo (max entropy view)": 0.8931297709923665,
      "VeMo (min entropy view)": 0.9647528264242962,
      "VeMo (random view)": 0.9647528264242962,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was lying down on their back."
  },
  "012515": {
    "text": "a person crosses their arms then drops them.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.19062285886136457,
      "Minus Multimodal Distance": -11.344097137451172,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.2471769120311365e-05,
      "MoBERT-F": 0.2869558581607178,
      "MoBERT-N": 0.4537531990514843,
      "MoBERT-min(F/N)": 0.2869558581607178,
      "MoBERT-max(F/N)": 0.4537531990514843,
      "MotionCritic": -1.814252495765686,
      "VeMo (human-opt view)": 0.9995949945480035,
      "VeMo (max entropy view)": 0.9604249667994688,
      "VeMo (min entropy view)": 0.9995949945480035,
      "VeMo (random view)": 0.9604249667994688,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person crosses their arms and then drops them."
  },
  "011295": {
    "text": "a person is walking forward strangely",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6594390272122997,
      "Minus Multimodal Distance": -9.756916046142578,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.351699292077683e-05,
      "MoBERT-F": 0.4764464110699424,
      "MoBERT-N": 0.5564538472617947,
      "MoBERT-min(F/N)": 0.4764464110699424,
      "MoBERT-max(F/N)": 0.5564538472617947,
      "MotionCritic": -4.09262228012085,
      "VeMo (human-opt view)": 0.8354263053536021,
      "VeMo (max entropy view)": 0.8080808080808081,
      "VeMo (min entropy view)": 0.8354263053536021,
      "VeMo (random view)": 0.8080808080808081,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking forward strangely."
  },
  "002530": {
    "text": "a person walks towards the camera.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.36763625690342555,
      "Minus Multimodal Distance": -4.74152946472168,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.000647340202704072,
      "MoBERT-F": 0.4682462662514624,
      "MoBERT-N": 0.5552716689397832,
      "MoBERT-min(F/N)": 0.4682462662514624,
      "MoBERT-max(F/N)": 0.5552716689397832,
      "MotionCritic": -6.13479471206665,
      "VeMo (human-opt view)": 0.7548291233283804,
      "VeMo (max entropy view)": 0.7548291233283804,
      "VeMo (min entropy view)": 0.13286713286713286,
      "VeMo (random view)": 0.13286713286713286,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks towards the camera."
  },
  "003729": {
    "text": "a person in boxing class",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7722977225632945,
      "Minus Multimodal Distance": -7.057582378387451,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.331439645786304e-05,
      "MoBERT-F": 0.3289118430836324,
      "MoBERT-N": 0.4045785010315613,
      "MoBERT-min(F/N)": 0.3289118430836324,
      "MoBERT-max(F/N)": 0.4045785010315613,
      "MotionCritic": -7.04242467880249,
      "VeMo (human-opt view)": 0.5472154963680388,
      "VeMo (max entropy view)": 0.5472154963680388,
      "VeMo (min entropy view)": 0.5928338762214984,
      "VeMo (random view)": 0.5472154963680388,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person in a boxing class"
  },
  "013382": {
    "text": "he turns left then stops",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4063100717883131,
      "Minus Multimodal Distance": -8.055404663085938,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.60857518191915e-05,
      "MoBERT-F": 0.47708857897590595,
      "MoBERT-N": 0.6017306844515747,
      "MoBERT-min(F/N)": 0.47708857897590595,
      "MoBERT-max(F/N)": 0.6017306844515747,
      "MotionCritic": -9.874807357788086,
      "VeMo (human-opt view)": 0.964964964964965,
      "VeMo (max entropy view)": 0.964964964964965,
      "VeMo (min entropy view)": 0.9723243661699246,
      "VeMo (random view)": 0.964964964964965,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "He turns left then stops"
  },
  "013389": {
    "text": "person is performing golf motion",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.750731302083639,
      "Minus Multimodal Distance": -5.513375759124756,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.345534787513316e-05,
      "MoBERT-F": 0.3624793524762312,
      "MoBERT-N": 0.4299697590114471,
      "MoBERT-min(F/N)": 0.3624793524762312,
      "MoBERT-max(F/N)": 0.4299697590114471,
      "MotionCritic": -8.92921257019043,
      "VeMo (human-opt view)": 0.8808808808808809,
      "VeMo (max entropy view)": 0.8176100628930818,
      "VeMo (min entropy view)": 0.8808808808808809,
      "VeMo (random view)": 0.8176100628930818,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is performing a golf motion"
  },
  "004394": {
    "text": "a person doing a meditation.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.3376787570271893,
      "Minus Multimodal Distance": -7.536638259887695,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2419673769036308e-05,
      "MoBERT-F": 0.3402442719886356,
      "MoBERT-N": 0.47425984753553674,
      "MoBERT-min(F/N)": 0.3402442719886356,
      "MoBERT-max(F/N)": 0.47425984753553674,
      "MotionCritic": -6.5660223960876465,
      "VeMo (human-opt view)": 0.0010309849786597194,
      "VeMo (max entropy view)": 0.011706102117061022,
      "VeMo (min entropy view)": 0.0010309849786597194,
      "VeMo (random view)": 0.011706102117061022,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is doing meditation."
  },
  "002976": {
    "text": "a person walks forward and turns clockwise, then grabs their knee in pain before continuing ahead and turning clockwise again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8253585324970365,
      "Minus Multimodal Distance": -8.221805572509766,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.07996050268411636,
      "MoBERT-F": 0.49694685417955886,
      "MoBERT-N": 0.5434731688719511,
      "MoBERT-min(F/N)": 0.49694685417955886,
      "MoBERT-max(F/N)": 0.5434731688719511,
      "MotionCritic": -14.787149429321289,
      "VeMo (human-opt view)": 0.7553865652724968,
      "VeMo (max entropy view)": 0.7553865652724968,
      "VeMo (min entropy view)": 0.8086642599277978,
      "VeMo (random view)": 0.7553865652724968,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward and turns clockwise. Then, they grab their knee in pain before continuing ahead and turning clockwise again."
  },
  "006214": {
    "text": "a person stands up from a sitting position, makes an overhand throw motion, and underhand throw motion, then a basketball shot motion.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9675968748170339,
      "Minus Multimodal Distance": -12.007399559020996,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6426478143548593e-05,
      "MoBERT-F": 0.3597268274495733,
      "MoBERT-N": 0.4563796459610808,
      "MoBERT-min(F/N)": 0.3597268274495733,
      "MoBERT-max(F/N)": 0.4563796459610808,
      "MotionCritic": -6.804850101470947,
      "VeMo (human-opt view)": 0.8875286916602907,
      "VeMo (max entropy view)": 0.8875286916602907,
      "VeMo (min entropy view)": 0.9150991357397051,
      "VeMo (random view)": 0.9150991357397051,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands up from a sitting position, makes an overhand throwing motion, an underhand throwing motion, and then a basketball - shooting motion."
  },
  "007121": {
    "text": "a person walks up four steps with their hands by their sides and their lean forward slightly as they go up the stairs and once they've stopped going up the stairs, they straighten up again",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3336472850581067,
      "Minus Multimodal Distance": -9.237915992736816,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.842248068191111e-05,
      "MoBERT-F": 0.4456370566740293,
      "MoBERT-N": 0.47872314331208216,
      "MoBERT-min(F/N)": 0.4456370566740293,
      "MoBERT-max(F/N)": 0.47872314331208216,
      "MotionCritic": -5.10654354095459,
      "VeMo (human-opt view)": 0.03953823953823954,
      "VeMo (max entropy view)": 0.03953823953823954,
      "VeMo (min entropy view)": 0.037269869779973056,
      "VeMo (random view)": 0.03953823953823954,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks up four steps with their hands by their sides. They lean forward slightly as they go up the stairs. Once they stop going up the stairs, they straighten up again."
  },
  "008458": {
    "text": "a person walks over something by standing on it, before continuing on their way.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.42967284572084924,
      "Minus Multimodal Distance": -12.486433029174805,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.784829823416658e-05,
      "MoBERT-F": 0.31182379794588494,
      "MoBERT-N": 0.4254890134673645,
      "MoBERT-min(F/N)": 0.31182379794588494,
      "MoBERT-max(F/N)": 0.4254890134673645,
      "MotionCritic": -0.1482161581516266,
      "VeMo (human-opt view)": 0.7658862876254181,
      "VeMo (max entropy view)": 0.7658862876254181,
      "VeMo (min entropy view)": 0.788159111933395,
      "VeMo (random view)": 0.788159111933395,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person steps over something by standing on it before continuing on their way."
  },
  "013174": {
    "text": "a person walks forward and picks up and moves a heavy object.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6407806944824872,
      "Minus Multimodal Distance": -13.3749418258667,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.030289659276604652,
      "MoBERT-F": 0.4539671767877591,
      "MoBERT-N": 0.5877413928993996,
      "MoBERT-min(F/N)": 0.4539671767877591,
      "MoBERT-max(F/N)": 0.5877413928993996,
      "MotionCritic": -5.568655490875244,
      "VeMo (human-opt view)": 0.003387431113807574,
      "VeMo (max entropy view)": 0.003387431113807574,
      "VeMo (min entropy view)": 0.0009106283335501497,
      "VeMo (random view)": 0.003387431113807574,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, picks up a heavy object, and moves it."
  },
  "011284": {
    "text": "a person picks something up with their right hand and walks forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.46204166056511675,
      "Minus Multimodal Distance": -2.954031229019165,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.0839055398246273e-05,
      "MoBERT-F": 0.3627288204574529,
      "MoBERT-N": 0.36348294132988235,
      "MoBERT-min(F/N)": 0.3627288204574529,
      "MoBERT-max(F/N)": 0.36348294132988235,
      "MotionCritic": -9.959564208984375,
      "VeMo (human-opt view)": 0.6657534246575343,
      "VeMo (max entropy view)": 0.6657534246575343,
      "VeMo (min entropy view)": 0.06746031746031746,
      "VeMo (random view)": 0.6657534246575343,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person picks something up with their right hand and walks forward."
  },
  "010557": {
    "text": "person puts hands on head then chest then knees then toes",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4537972411383724,
      "Minus Multimodal Distance": -4.982041358947754,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.773301068576984e-05,
      "MoBERT-F": 0.46806831087804496,
      "MoBERT-N": 0.5907468559226503,
      "MoBERT-min(F/N)": 0.46806831087804496,
      "MoBERT-max(F/N)": 0.5907468559226503,
      "MotionCritic": -7.1083664894104,
      "VeMo (human-opt view)": 0.8670181605155243,
      "VeMo (max entropy view)": 0.6650485436893204,
      "VeMo (min entropy view)": 0.8670181605155243,
      "VeMo (random view)": 0.6650485436893204,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person puts their hands on their head, then on their chest, then on their knees, and then on their toes."
  },
  "001168": {
    "text": "a person walks forward casually with a swagger to their hips.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2599523821962504,
      "Minus Multimodal Distance": -2.7364449501037598,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9116408768459223e-05,
      "MoBERT-F": 0.3616278328087303,
      "MoBERT-N": 0.49002278829458795,
      "MoBERT-min(F/N)": 0.3616278328087303,
      "MoBERT-max(F/N)": 0.49002278829458795,
      "MotionCritic": -4.468509674072266,
      "VeMo (human-opt view)": 0.9098712446351931,
      "VeMo (max entropy view)": 0.8350877192982457,
      "VeMo (min entropy view)": 0.9098712446351931,
      "VeMo (random view)": 0.8350877192982457,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward casually, swaying their hips with a swagger."
  },
  "012118": {
    "text": "this person side steps left, stops and side steps right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.34282417570158985,
      "Minus Multimodal Distance": -4.308772563934326,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.21123695373535156,
      "MoBERT-F": 0.5739315610915792,
      "MoBERT-N": 0.6003467886980826,
      "MoBERT-min(F/N)": 0.5739315610915792,
      "MoBERT-max(F/N)": 0.6003467886980826,
      "MotionCritic": -3.171837091445923,
      "VeMo (human-opt view)": 0.9808429118773946,
      "VeMo (max entropy view)": 0.9626880250048838,
      "VeMo (min entropy view)": 0.9808429118773946,
      "VeMo (random view)": 0.9626880250048838,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person side - steps to the left, stops, and then side - steps to the right."
  },
  "011555": {
    "text": "the person is doing a casual quick walk.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9674883390511195,
      "Minus Multimodal Distance": -10.874406814575195,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00019352736126165837,
      "MoBERT-F": 0.5205879670144099,
      "MoBERT-N": 0.5428196680597117,
      "MoBERT-min(F/N)": 0.5205879670144099,
      "MoBERT-max(F/N)": 0.5428196680597117,
      "MotionCritic": -3.4141898155212402,
      "VeMo (human-opt view)": 0.9399744572158365,
      "VeMo (max entropy view)": 0.8807692307692307,
      "VeMo (min entropy view)": 0.9399744572158365,
      "VeMo (random view)": 0.8807692307692307,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is doing a casual quick walk."
  },
  "005419": {
    "text": "a person shields themselves with their left arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7338503836920695,
      "Minus Multimodal Distance": -2.4101364612579346,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.1459628593875095e-05,
      "MoBERT-F": 0.3440657322461756,
      "MoBERT-N": 0.41981522014343337,
      "MoBERT-min(F/N)": 0.3440657322461756,
      "MoBERT-max(F/N)": 0.41981522014343337,
      "MotionCritic": -5.4630560874938965,
      "VeMo (human-opt view)": 0.22288438617401668,
      "VeMo (max entropy view)": 0.36193029490616624,
      "VeMo (min entropy view)": 0.22288438617401668,
      "VeMo (random view)": 0.36193029490616624,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person shields themselves with their left arm."
  },
  "000439": {
    "text": "person is doing \"i'm the little teapot\" dance",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3989486927291144,
      "Minus Multimodal Distance": -6.015406131744385,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.05857256427407265,
      "MoBERT-F": 0.6260013591052205,
      "MoBERT-N": 0.6604961318026247,
      "MoBERT-min(F/N)": 0.6260013591052205,
      "MoBERT-max(F/N)": 0.6604961318026247,
      "MotionCritic": 0.6215506196022034,
      "VeMo (human-opt view)": 0.03312444046553268,
      "VeMo (max entropy view)": 0.03312444046553268,
      "VeMo (min entropy view)": 0.026067967790364407,
      "VeMo (random view)": 0.026067967790364407,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is doing the \"I'm a Little Teapot\" dance."
  },
  "013757": {
    "text": "a person runs, hesitates, then runs again.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5560774220348718,
      "Minus Multimodal Distance": -5.1055989265441895,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.48424232006073,
      "MoBERT-F": 0.6870836702736215,
      "MoBERT-N": 0.6928564792247317,
      "MoBERT-min(F/N)": 0.6870836702736215,
      "MoBERT-max(F/N)": 0.6928564792247317,
      "MotionCritic": -5.676130294799805,
      "VeMo (human-opt view)": 0.9797054389423634,
      "VeMo (max entropy view)": 0.964778810932657,
      "VeMo (min entropy view)": 0.9797054389423634,
      "VeMo (random view)": 0.9797054389423634,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs, hesitates, then runs again."
  },
  "008971": {
    "text": "a person walks around in a circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2049959790022116,
      "Minus Multimodal Distance": -6.369812965393066,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.1325044333934784,
      "MoBERT-F": 0.649256128494171,
      "MoBERT-N": 0.6836380012124036,
      "MoBERT-min(F/N)": 0.649256128494171,
      "MoBERT-max(F/N)": 0.6836380012124036,
      "MotionCritic": -11.98621940612793,
      "VeMo (human-opt view)": 0.7660910518053375,
      "VeMo (max entropy view)": 0.7660910518053375,
      "VeMo (min entropy view)": 0.7987711213517665,
      "VeMo (random view)": 0.7660910518053375,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks around in a circle."
  },
  "012385": {
    "text": "walking forward and then back.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4437839245823548,
      "Minus Multimodal Distance": -8.996006965637207,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.186839164001867e-05,
      "MoBERT-F": 0.523716463438099,
      "MoBERT-N": 0.6396067296194442,
      "MoBERT-min(F/N)": 0.523716463438099,
      "MoBERT-max(F/N)": 0.6396067296194442,
      "MotionCritic": -11.454110145568848,
      "VeMo (human-opt view)": 0.9050772626931567,
      "VeMo (max entropy view)": 0.8173076923076923,
      "VeMo (min entropy view)": 0.9050772626931567,
      "VeMo (random view)": 0.9050772626931567,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and then back."
  },
  "014347": {
    "text": "a person is sitting and puts their left hand to their head, and their right foot up on their left knee.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5528269634923514,
      "Minus Multimodal Distance": -10.515485763549805,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.025624871253967285,
      "MoBERT-F": 0.43755064824967205,
      "MoBERT-N": 0.48899648655360817,
      "MoBERT-min(F/N)": 0.43755064824967205,
      "MoBERT-max(F/N)": 0.48899648655360817,
      "MotionCritic": -5.269084930419922,
      "VeMo (human-opt view)": 0.7422303473491774,
      "VeMo (max entropy view)": 0.7422303473491774,
      "VeMo (min entropy view)": 0.7665647298674821,
      "VeMo (random view)": 0.7665647298674821,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is sitting, putting their left hand to their head and their right foot up on their left knee."
  },
  "000665": {
    "text": "a person walks forward, with a quickening step and stops tentatively, as they take their last step.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4582303240820589,
      "Minus Multimodal Distance": -10.782746315002441,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.346022251411341e-05,
      "MoBERT-F": 0.4621060723878796,
      "MoBERT-N": 0.5513398523881827,
      "MoBERT-min(F/N)": 0.4621060723878796,
      "MoBERT-max(F/N)": 0.5513398523881827,
      "MotionCritic": -2.417360544204712,
      "VeMo (human-opt view)": 0.9578713968957872,
      "VeMo (max entropy view)": 0.9362928797924474,
      "VeMo (min entropy view)": 0.9578713968957872,
      "VeMo (random view)": 0.9362928797924474,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward with a quickening step and stops tentatively as they take their last step."
  },
  "002009": {
    "text": "a person is using his left arm moving it up and down leaving his elbow still.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5312893165629554,
      "Minus Multimodal Distance": -9.192315101623535,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.3127551432698965e-05,
      "MoBERT-F": 0.31328729074894895,
      "MoBERT-N": 0.40360968414961657,
      "MoBERT-min(F/N)": 0.31328729074894895,
      "MoBERT-max(F/N)": 0.40360968414961657,
      "MotionCritic": -9.19411849975586,
      "VeMo (human-opt view)": 0.6927835051546392,
      "VeMo (max entropy view)": 0.6927835051546392,
      "VeMo (min entropy view)": 0.8350045578851413,
      "VeMo (random view)": 0.6927835051546392,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is using his left arm, moving it up and down while keeping his elbow still."
  },
  "005891": {
    "text": "the man picks up a brushes nd brushes someone's hair then puts the brush back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.522999130490738,
      "Minus Multimodal Distance": -6.000187397003174,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8698001187876798e-05,
      "MoBERT-F": 0.317710892829449,
      "MoBERT-N": 0.4082845610803993,
      "MoBERT-min(F/N)": 0.317710892829449,
      "MoBERT-max(F/N)": 0.4082845610803993,
      "MotionCritic": -9.651300430297852,
      "VeMo (human-opt view)": 7.191590666666083e-06,
      "VeMo (max entropy view)": 0.00011976356723520934,
      "VeMo (min entropy view)": 7.191590666666083e-06,
      "VeMo (random view)": 7.191590666666083e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man picks up a brush and brushes someone's hair, then puts the brush back down."
  },
  "005751": {
    "text": "a person balances on their right foot while kicking in multiple directions with their left foot.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6720707098041789,
      "Minus Multimodal Distance": -9.131706237792969,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.989471971988678,
      "MoBERT-F": 0.7179237553040978,
      "MoBERT-N": 0.6682931055866497,
      "MoBERT-min(F/N)": 0.6682931055866497,
      "MoBERT-max(F/N)": 0.7179237553040978,
      "MotionCritic": -4.654869556427002,
      "VeMo (human-opt view)": 0.9739934502022731,
      "VeMo (max entropy view)": 0.9739934502022731,
      "VeMo (min entropy view)": 0.9755615897309307,
      "VeMo (random view)": 0.9739934502022731,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person balances on their right foot while kicking in multiple directions with their left foot."
  },
  "005229": {
    "text": "a person squats to lift something up then struggles to carry and put it down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6645165959437579,
      "Minus Multimodal Distance": -2.5444538593292236,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.511969614715781e-05,
      "MoBERT-F": 0.33517991992656726,
      "MoBERT-N": 0.4196304036805612,
      "MoBERT-min(F/N)": 0.33517991992656726,
      "MoBERT-max(F/N)": 0.4196304036805612,
      "MotionCritic": -7.913752555847168,
      "VeMo (human-opt view)": 0.7983539094650206,
      "VeMo (max entropy view)": 0.7544783983140148,
      "VeMo (min entropy view)": 0.7983539094650206,
      "VeMo (random view)": 0.7983539094650206,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person squats to lift something up, then struggles to carry it and put it down."
  },
  "008147": {
    "text": "this person swings both arms around and back to front.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6839206614250984,
      "Minus Multimodal Distance": -5.257464408874512,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.2559010088443756,
      "MoBERT-F": 0.7867950036134669,
      "MoBERT-N": 0.6452043220093916,
      "MoBERT-min(F/N)": 0.6452043220093916,
      "MoBERT-max(F/N)": 0.7867950036134669,
      "MotionCritic": -11.724893569946289,
      "VeMo (human-opt view)": 0.9796699354221479,
      "VeMo (max entropy view)": 0.9628061687329906,
      "VeMo (min entropy view)": 0.9796699354221479,
      "VeMo (random view)": 0.9796699354221479,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person swings both arms around, from back to front."
  },
  "000374": {
    "text": "a person walks forward one foot in front of another",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5220337822057884,
      "Minus Multimodal Distance": -11.292792320251465,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.493468127795495e-05,
      "MoBERT-F": 0.3959552921462038,
      "MoBERT-N": 0.5123695553979899,
      "MoBERT-min(F/N)": 0.3959552921462038,
      "MoBERT-max(F/N)": 0.5123695553979899,
      "MotionCritic": -7.911764144897461,
      "VeMo (human-opt view)": 0.9688249400479616,
      "VeMo (max entropy view)": 0.952638700947226,
      "VeMo (min entropy view)": 0.9688249400479616,
      "VeMo (random view)": 0.952638700947226,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward with one foot in front of the other."
  },
  "007617": {
    "text": "the person standing still with his arms crossed.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.22062806899359283,
      "Minus Multimodal Distance": -9.764305114746094,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.353056647232734e-05,
      "MoBERT-F": 0.3179450339335174,
      "MoBERT-N": 0.44022453005202133,
      "MoBERT-min(F/N)": 0.3179450339335174,
      "MoBERT-max(F/N)": 0.44022453005202133,
      "MotionCritic": -6.971134662628174,
      "VeMo (human-opt view)": 0.9840462427745664,
      "VeMo (max entropy view)": 0.03964165733482643,
      "VeMo (min entropy view)": 0.9840462427745664,
      "VeMo (random view)": 0.03964165733482643,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is standing still with his arms crossed."
  },
  "002749": {
    "text": "person is waving with their right hand",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.23543640925417061,
      "Minus Multimodal Distance": -11.852046966552734,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.932551382808015e-05,
      "MoBERT-F": 0.3267549693905373,
      "MoBERT-N": 0.35591327994255034,
      "MoBERT-min(F/N)": 0.3267549693905373,
      "MoBERT-max(F/N)": 0.35591327994255034,
      "MotionCritic": -8.537418365478516,
      "VeMo (human-opt view)": 0.9740847387906212,
      "VeMo (max entropy view)": 0.9648887697668186,
      "VeMo (min entropy view)": 0.9740847387906212,
      "VeMo (random view)": 0.9648887697668186,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is waving with their right hand."
  },
  "001675": {
    "text": "person went around to sit on chair.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7964277285878337,
      "Minus Multimodal Distance": -8.261527061462402,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00015188116231001914,
      "MoBERT-F": 0.4283001162938652,
      "MoBERT-N": 0.5513744069369197,
      "MoBERT-min(F/N)": 0.4283001162938652,
      "MoBERT-max(F/N)": 0.5513744069369197,
      "MotionCritic": -7.574608325958252,
      "VeMo (human-opt view)": 0.02759526938239159,
      "VeMo (max entropy view)": 0.08023483365949119,
      "VeMo (min entropy view)": 0.02759526938239159,
      "VeMo (random view)": 0.08023483365949119,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person went around and sat on the chair."
  },
  "003910": {
    "text": "a man walks forward then right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6397674392844276,
      "Minus Multimodal Distance": -6.586055278778076,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4418832254013978e-05,
      "MoBERT-F": 0.4258501779416117,
      "MoBERT-N": 0.588235134592196,
      "MoBERT-min(F/N)": 0.4258501779416117,
      "MoBERT-max(F/N)": 0.588235134592196,
      "MotionCritic": -2.9718806743621826,
      "VeMo (human-opt view)": 0.9687620516775935,
      "VeMo (max entropy view)": 0.9687620516775935,
      "VeMo (min entropy view)": 0.982048417132216,
      "VeMo (random view)": 0.9687620516775935,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks forward and then turns right."
  },
  "001534": {
    "text": "a person is stretching its shoulders",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6593994445906983,
      "Minus Multimodal Distance": -6.325351238250732,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5303143047494814e-05,
      "MoBERT-F": 0.48143677578631194,
      "MoBERT-N": 0.49225236891309715,
      "MoBERT-min(F/N)": 0.48143677578631194,
      "MoBERT-max(F/N)": 0.49225236891309715,
      "MotionCritic": -6.712846279144287,
      "VeMo (human-opt view)": 0.9689488910318226,
      "VeMo (max entropy view)": 0.9689488910318226,
      "VeMo (min entropy view)": 0.9769274057400112,
      "VeMo (random view)": 0.9769274057400112,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is stretching their shoulders."
  },
  "001759": {
    "text": "he tries to clean the floor",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1112986814776247,
      "Minus Multimodal Distance": -8.36005973815918,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.12303195148706436,
      "MoBERT-F": 0.6064626382015339,
      "MoBERT-N": 0.5751179629795488,
      "MoBERT-min(F/N)": 0.5751179629795488,
      "MoBERT-max(F/N)": 0.6064626382015339,
      "MotionCritic": -3.9536428451538086,
      "VeMo (human-opt view)": 0.7317073170731707,
      "VeMo (max entropy view)": 0.68,
      "VeMo (min entropy view)": 0.7317073170731707,
      "VeMo (random view)": 0.7317073170731707,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "He tries to clean the floor."
  },
  "009762": {
    "text": "a person throws a shotput.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9553445740711006,
      "Minus Multimodal Distance": -3.659909725189209,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.1925482451915741,
      "MoBERT-F": 0.669750754195423,
      "MoBERT-N": 0.6207869897962893,
      "MoBERT-min(F/N)": 0.6207869897962893,
      "MoBERT-max(F/N)": 0.669750754195423,
      "MotionCritic": -4.129116535186768,
      "VeMo (human-opt view)": 0.5150501672240803,
      "VeMo (max entropy view)": 0.5150501672240803,
      "VeMo (min entropy view)": 0.7058823529411765,
      "VeMo (random view)": 0.7058823529411765,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person throws a shot - put."
  },
  "002627": {
    "text": "a person dancing, starting in a t pose, the. turns, continues to dance and finishes with another turn",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1790380054560836,
      "Minus Multimodal Distance": -7.627544403076172,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9955651164054871,
      "MoBERT-F": 0.690713795841329,
      "MoBERT-N": 0.6893471272177596,
      "MoBERT-min(F/N)": 0.6893471272177596,
      "MoBERT-max(F/N)": 0.690713795841329,
      "MotionCritic": -12.943746566772461,
      "VeMo (human-opt view)": 0.943467676494455,
      "VeMo (max entropy view)": 0.9147788565264293,
      "VeMo (min entropy view)": 0.943467676494455,
      "VeMo (random view)": 0.9147788565264293,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is dancing. They start in a T - pose, then turn, continue to dance, and finish with another turn."
  },
  "006662": {
    "text": "a man is shot in the chest, falls to the ground, crawls and pulls himself up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4426289669827905,
      "Minus Multimodal Distance": -5.107088565826416,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0166697446256876,
      "MoBERT-F": 0.5843759444076837,
      "MoBERT-N": 0.5387679120741817,
      "MoBERT-min(F/N)": 0.5387679120741817,
      "MoBERT-max(F/N)": 0.5843759444076837,
      "MotionCritic": -4.471206188201904,
      "VeMo (human-opt view)": 0.8932038834951457,
      "VeMo (max entropy view)": 0.8433734939759037,
      "VeMo (min entropy view)": 0.8932038834951457,
      "VeMo (random view)": 0.8433734939759037,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is shot in the chest, falls to the ground, crawls, and pulls himself up."
  },
  "014108": {
    "text": "a person steps back and sits down, then stands back up again and walks forward",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.505899855417224,
      "Minus Multimodal Distance": -5.90577507019043,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9934086799621582,
      "MoBERT-F": 0.6446201516540072,
      "MoBERT-N": 0.5741448545345396,
      "MoBERT-min(F/N)": 0.5741448545345396,
      "MoBERT-max(F/N)": 0.6446201516540072,
      "MotionCritic": -10.523216247558594,
      "VeMo (human-opt view)": 0.9928709142267856,
      "VeMo (max entropy view)": 0.98414517669532,
      "VeMo (min entropy view)": 0.9928709142267856,
      "VeMo (random view)": 0.9928709142267856,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person steps back and sits down. Then, they stand back up again and walk forward."
  },
  "008378": {
    "text": "a man climbs up and seems to be putting something away with both hands",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.685838542088573,
      "Minus Multimodal Distance": -3.535290002822876,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.01573711633682251,
      "MoBERT-F": 0.7088030141892808,
      "MoBERT-N": 0.5711177416770029,
      "MoBERT-min(F/N)": 0.5711177416770029,
      "MoBERT-max(F/N)": 0.7088030141892808,
      "MotionCritic": -4.099842548370361,
      "VeMo (human-opt view)": 0.009129250974320645,
      "VeMo (max entropy view)": 0.01795862696067288,
      "VeMo (min entropy view)": 0.009129250974320645,
      "VeMo (random view)": 0.009129250974320645,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man climbs up and seems to be putting something away with both hands."
  },
  "014232": {
    "text": "a person waalks forward and appears to pour two items into a bowl.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.388974450266646,
      "Minus Multimodal Distance": -4.767528533935547,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0008475342765450478,
      "MoBERT-F": 0.524062563506344,
      "MoBERT-N": 0.5575356565384166,
      "MoBERT-min(F/N)": 0.524062563506344,
      "MoBERT-max(F/N)": 0.5575356565384166,
      "MotionCritic": -4.623727798461914,
      "VeMo (human-opt view)": 2.0897470046130335e-05,
      "VeMo (max entropy view)": 3.2256574954041074e-05,
      "VeMo (min entropy view)": 2.0897470046130335e-05,
      "VeMo (random view)": 3.2256574954041074e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and appears to pour two items into a bowl."
  },
  "004779": {
    "text": "a person limping with right leg hurt and going around in a circle.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9568989464654952,
      "Minus Multimodal Distance": -7.284951686859131,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9834888577461243,
      "MoBERT-F": 0.6714804851838194,
      "MoBERT-N": 0.718417252275699,
      "MoBERT-min(F/N)": 0.6714804851838194,
      "MoBERT-max(F/N)": 0.718417252275699,
      "MotionCritic": -17.916526794433594,
      "VeMo (human-opt view)": 0.9045592705167174,
      "VeMo (max entropy view)": 0.8352668213457076,
      "VeMo (min entropy view)": 0.9045592705167174,
      "VeMo (random view)": 0.8352668213457076,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is limping with their right leg hurt and walking around in a circle."
  },
  "014384": {
    "text": "a person balances on one foot while moving their other, and then switches.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.43405209678948153,
      "Minus Multimodal Distance": -13.47542953491211,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9268203973770142,
      "MoBERT-F": 0.7451933192355236,
      "MoBERT-N": 0.6551514927163745,
      "MoBERT-min(F/N)": 0.6551514927163745,
      "MoBERT-max(F/N)": 0.7451933192355236,
      "MotionCritic": -8.81717586517334,
      "VeMo (human-opt view)": 0.978296382730455,
      "VeMo (max entropy view)": 0.975609756097561,
      "VeMo (min entropy view)": 0.978296382730455,
      "VeMo (random view)": 0.978296382730455,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person balances on one foot while moving the other, and then switches."
  },
  "012361": {
    "text": "a person picks up  a heavy object and moves it right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4610217169171275,
      "Minus Multimodal Distance": -8.39856243133545,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.385021409485489e-05,
      "MoBERT-F": 0.43684906717251326,
      "MoBERT-N": 0.5139218871265168,
      "MoBERT-min(F/N)": 0.43684906717251326,
      "MoBERT-max(F/N)": 0.5139218871265168,
      "MotionCritic": -4.572755336761475,
      "VeMo (human-opt view)": 0.0012459255716784758,
      "VeMo (max entropy view)": 0.0012459255716784758,
      "VeMo (min entropy view)": 0.0002867094721458072,
      "VeMo (random view)": 0.0002867094721458072,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person picks up a heavy object and moves it to the right."
  },
  "011558": {
    "text": "the person was moving his arms up and down doing something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.23665098763682837,
      "Minus Multimodal Distance": -9.317572593688965,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4885790480766445e-05,
      "MoBERT-F": 0.4153864535663741,
      "MoBERT-N": 0.46615557233035326,
      "MoBERT-min(F/N)": 0.4153864535663741,
      "MoBERT-max(F/N)": 0.46615557233035326,
      "MotionCritic": -6.418580532073975,
      "VeMo (human-opt view)": 0.8263888888888888,
      "VeMo (max entropy view)": 0.8263888888888888,
      "VeMo (min entropy view)": 0.8809648662821186,
      "VeMo (random view)": 0.8263888888888888,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was moving his arms up and down, doing something."
  },
  "004863": {
    "text": "a person walks and then puts both arms out while bowing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8419919876770623,
      "Minus Multimodal Distance": -6.6530232429504395,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 8.921422704588622e-05,
      "MoBERT-F": 0.4487664449363241,
      "MoBERT-N": 0.5087842869586169,
      "MoBERT-min(F/N)": 0.4487664449363241,
      "MoBERT-max(F/N)": 0.5087842869586169,
      "MotionCritic": -12.364187240600586,
      "VeMo (human-opt view)": 0.7424400417101147,
      "VeMo (max entropy view)": 0.7424400417101147,
      "VeMo (min entropy view)": 0.7975708502024291,
      "VeMo (random view)": 0.7424400417101147,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks and then puts both arms out while bowing."
  },
  "010280": {
    "text": "a man scratches his head with his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.22905178152886668,
      "Minus Multimodal Distance": -11.765946388244629,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.447807805263437e-05,
      "MoBERT-F": 0.2867467240784287,
      "MoBERT-N": 0.3568077736554831,
      "MoBERT-min(F/N)": 0.2867467240784287,
      "MoBERT-max(F/N)": 0.3568077736554831,
      "MotionCritic": -4.1659698486328125,
      "VeMo (human-opt view)": 0.7545304777594728,
      "VeMo (max entropy view)": 0.7545304777594728,
      "VeMo (min entropy view)": 0.980798701884071,
      "VeMo (random view)": 0.7545304777594728,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man scratches his head with his right hand."
  },
  "011988": {
    "text": "this person bends forward as if to bow.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.25125784631399123,
      "Minus Multimodal Distance": -10.380385398864746,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.09643210470676422,
      "MoBERT-F": 0.6354741162485826,
      "MoBERT-N": 0.660521221740379,
      "MoBERT-min(F/N)": 0.6354741162485826,
      "MoBERT-max(F/N)": 0.660521221740379,
      "MotionCritic": -10.002647399902344,
      "VeMo (human-opt view)": 0.9993733953653438,
      "VeMo (max entropy view)": 0.9970266761589681,
      "VeMo (min entropy view)": 0.9993733953653438,
      "VeMo (random view)": 0.9970266761589681,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person bends forward as if to bow."
  },
  "012601": {
    "text": "the person sat down and crossed his legs.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3732535871480973,
      "Minus Multimodal Distance": -12.496496200561523,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.078052759519778e-05,
      "MoBERT-F": 0.3342413279018046,
      "MoBERT-N": 0.5261070606299616,
      "MoBERT-min(F/N)": 0.3342413279018046,
      "MoBERT-max(F/N)": 0.5261070606299616,
      "MotionCritic": -4.223310947418213,
      "VeMo (human-opt view)": 0.9604249667994688,
      "VeMo (max entropy view)": 0.9100794646591385,
      "VeMo (min entropy view)": 0.9604249667994688,
      "VeMo (random view)": 0.9100794646591385,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person sat down and crossed his legs."
  },
  "007812": {
    "text": "a person grabbedsomething and lean it somehwere",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.39939014860379374,
      "Minus Multimodal Distance": -8.04739761352539,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6778907340485603e-05,
      "MoBERT-F": 0.34838352649954457,
      "MoBERT-N": 0.4188889890710632,
      "MoBERT-min(F/N)": 0.34838352649954457,
      "MoBERT-max(F/N)": 0.4188889890710632,
      "MotionCritic": -4.201788425445557,
      "VeMo (human-opt view)": 0.28180354267310787,
      "VeMo (max entropy view)": 0.5166051660516605,
      "VeMo (min entropy view)": 0.28180354267310787,
      "VeMo (random view)": 0.5166051660516605,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person grabbed something and left it somewhere."
  },
  "010520": {
    "text": "a person strafes to the right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.262240123452353,
      "Minus Multimodal Distance": -2.5156960487365723,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2813545001554303e-05,
      "MoBERT-F": 0.41606022213741306,
      "MoBERT-N": 0.4830403421161051,
      "MoBERT-min(F/N)": 0.41606022213741306,
      "MoBERT-max(F/N)": 0.4830403421161051,
      "MotionCritic": -3.9969515800476074,
      "VeMo (human-opt view)": 0.9433478595231992,
      "VeMo (max entropy view)": 0.9400402414486921,
      "VeMo (min entropy view)": 0.9433478595231992,
      "VeMo (random view)": 0.9400402414486921,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves sideways to the right."
  },
  "014506": {
    "text": "figure walks forward, raises foot to step up and over, uses other foot to drop down, walks forward and stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5883934356155184,
      "Minus Multimodal Distance": -2.1271913051605225,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5343522793264128e-05,
      "MoBERT-F": 0.33423469267334377,
      "MoBERT-N": 0.4880242188326816,
      "MoBERT-min(F/N)": 0.33423469267334377,
      "MoBERT-max(F/N)": 0.4880242188326816,
      "MotionCritic": -2.7444863319396973,
      "VeMo (human-opt view)": 0.9098712446351931,
      "VeMo (max entropy view)": 0.8670181605155243,
      "VeMo (min entropy view)": 0.9098712446351931,
      "VeMo (random view)": 0.9098712446351931,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The figure walks forward, raises a foot to step up and over, uses the other foot to drop down, walks forward, and then stops."
  },
  "010915": {
    "text": "a person enacting a mime with an invisible barrier in front of them touching with both hands",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3452867185202929,
      "Minus Multimodal Distance": -6.679506301879883,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.3377185395220295e-05,
      "MoBERT-F": 0.3386263308743806,
      "MoBERT-N": 0.36235894431804466,
      "MoBERT-min(F/N)": 0.3386263308743806,
      "MoBERT-max(F/N)": 0.36235894431804466,
      "MotionCritic": -5.673184871673584,
      "VeMo (human-opt view)": 0.9919363284113519,
      "VeMo (max entropy view)": 0.9604802401200601,
      "VeMo (min entropy view)": 0.9919363284113519,
      "VeMo (random view)": 0.9604802401200601,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is enacting a mime, touching an invisible barrier in front of them with both hands."
  },
  "005481": {
    "text": "a figure walks confidently forward then raises their hand as if to access a door nob",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6326481964224443,
      "Minus Multimodal Distance": -9.296639442443848,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.776076381560415e-05,
      "MoBERT-F": 0.24305570825469358,
      "MoBERT-N": 0.4065469370815877,
      "MoBERT-min(F/N)": 0.24305570825469358,
      "MoBERT-max(F/N)": 0.4065469370815877,
      "MotionCritic": -5.2945051193237305,
      "VeMo (human-opt view)": 0.7548076923076923,
      "VeMo (max entropy view)": 0.7425414364640884,
      "VeMo (min entropy view)": 0.7548076923076923,
      "VeMo (random view)": 0.7425414364640884,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks confidently forward, then raises their hand as if to reach for a doorknob."
  },
  "011223": {
    "text": "this person waves his right arm up and down as if to enjoy a beat.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.27298376875583774,
      "Minus Multimodal Distance": -7.567451000213623,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.240473961341195e-05,
      "MoBERT-F": 0.28120831097169363,
      "MoBERT-N": 0.3961996098755101,
      "MoBERT-min(F/N)": 0.28120831097169363,
      "MoBERT-max(F/N)": 0.3961996098755101,
      "MotionCritic": -8.624494552612305,
      "VeMo (human-opt view)": 0.7194570135746606,
      "VeMo (max entropy view)": 0.7194570135746606,
      "VeMo (min entropy view)": 0.9048275862068965,
      "VeMo (random view)": 0.7194570135746606,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person waves his right arm up and down as if to enjoy a beat."
  },
  "001859": {
    "text": "a person walks forward, leans over an object, then stands back up and walks backward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9274760055018931,
      "Minus Multimodal Distance": -6.339864253997803,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.3858983814716339,
      "MoBERT-F": 0.5403605850578457,
      "MoBERT-N": 0.5657683164664654,
      "MoBERT-min(F/N)": 0.5403605850578457,
      "MoBERT-max(F/N)": 0.5657683164664654,
      "MotionCritic": -5.579909801483154,
      "VeMo (human-opt view)": 0.8594594594594595,
      "VeMo (max entropy view)": 0.6365007541478129,
      "VeMo (min entropy view)": 0.8594594594594595,
      "VeMo (random view)": 0.6365007541478129,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, leans over an object, then stands back up and walks backward."
  },
  "011158": {
    "text": "a man jogs forward a few steps, walks back to his spot and squats down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0708188021203306,
      "Minus Multimodal Distance": -2.0662145614624023,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9760730266571045,
      "MoBERT-F": 0.6165241766284202,
      "MoBERT-N": 0.6021900509341975,
      "MoBERT-min(F/N)": 0.6021900509341975,
      "MoBERT-max(F/N)": 0.6165241766284202,
      "MotionCritic": -7.737785816192627,
      "VeMo (human-opt view)": 0.8869690424766019,
      "VeMo (max entropy view)": 0.8869690424766019,
      "VeMo (min entropy view)": 0.8990825688073395,
      "VeMo (random view)": 0.8990825688073395,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man jogs forward a few steps, walks back to his spot, and squats down."
  },
  "014200": {
    "text": "a person sits on a stool of some sort and brushes off their legs.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7223945176688366,
      "Minus Multimodal Distance": -6.8152031898498535,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.09373917430639267,
      "MoBERT-F": 0.49547660137822624,
      "MoBERT-N": 0.41843983878527713,
      "MoBERT-min(F/N)": 0.41843983878527713,
      "MoBERT-max(F/N)": 0.49547660137822624,
      "MotionCritic": -5.364747524261475,
      "VeMo (human-opt view)": 0.00016867545881781815,
      "VeMo (max entropy view)": 0.00026172082915511225,
      "VeMo (min entropy view)": 0.00016867545881781815,
      "VeMo (random view)": 0.00016867545881781815,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits on some sort of stool and brushes their legs."
  },
  "013998": {
    "text": "a man raises his left hand to this face then lowers it back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.1662972146455608,
      "Minus Multimodal Distance": -6.125868797302246,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.157752507831901e-05,
      "MoBERT-F": 0.26267830300061956,
      "MoBERT-N": 0.401249922050768,
      "MoBERT-min(F/N)": 0.26267830300061956,
      "MoBERT-max(F/N)": 0.401249922050768,
      "MotionCritic": -14.958375930786133,
      "VeMo (human-opt view)": 0.9101251422070534,
      "VeMo (max entropy view)": 0.9101251422070534,
      "VeMo (min entropy view)": 0.9602053915275995,
      "VeMo (random view)": 0.9101251422070534,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man raises his left hand to his face then lowers it back down."
  },
  "010499": {
    "text": "this person steps forward then sits on a stool to brush off his shoe.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5454698431636273,
      "Minus Multimodal Distance": -9.681527137756348,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9388443231582642,
      "MoBERT-F": 0.6140774076068287,
      "MoBERT-N": 0.5753502869807564,
      "MoBERT-min(F/N)": 0.5753502869807564,
      "MoBERT-max(F/N)": 0.6140774076068287,
      "MotionCritic": -7.7343668937683105,
      "VeMo (human-opt view)": 0.04477611940298507,
      "VeMo (max entropy view)": 0.04477611940298507,
      "VeMo (min entropy view)": 0.007089832346317457,
      "VeMo (random view)": 0.007089832346317457,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person steps forward and then sits on a stool to brush off his shoes."
  },
  "007999": {
    "text": "a person doges to the left, then doges to the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9090993342759462,
      "Minus Multimodal Distance": -9.511341094970703,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.995902955532074,
      "MoBERT-F": 0.7046867985552537,
      "MoBERT-N": 0.656553177912105,
      "MoBERT-min(F/N)": 0.656553177912105,
      "MoBERT-max(F/N)": 0.7046867985552537,
      "MotionCritic": -5.80557107925415,
      "VeMo (human-opt view)": 0.8268551236749117,
      "VeMo (max entropy view)": 0.8268551236749117,
      "VeMo (min entropy view)": 0.8522727272727273,
      "VeMo (random view)": 0.8522727272727273,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person dodges to the left, then dodges to the right."
  },
  "001038": {
    "text": "a person warming up by swinging back and arms around.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8918438728639003,
      "Minus Multimodal Distance": -12.5676851272583,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9538938403129578,
      "MoBERT-F": 0.8360549270176905,
      "MoBERT-N": 0.6600916471759689,
      "MoBERT-min(F/N)": 0.6600916471759689,
      "MoBERT-max(F/N)": 0.8360549270176905,
      "MotionCritic": -10.037485122680664,
      "VeMo (human-opt view)": 0.986784140969163,
      "VeMo (max entropy view)": 0.9783743475018642,
      "VeMo (min entropy view)": 0.986784140969163,
      "VeMo (random view)": 0.986784140969163,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is warming up by swinging their arms back and around."
  },
  "014351": {
    "text": "the person bent down and dodge something towards the left.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7957477283795951,
      "Minus Multimodal Distance": -5.743102073669434,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.5518321394920349,
      "MoBERT-F": 0.6899067625784847,
      "MoBERT-N": 0.6355295097869543,
      "MoBERT-min(F/N)": 0.6355295097869543,
      "MoBERT-max(F/N)": 0.6899067625784847,
      "MotionCritic": -7.268082141876221,
      "VeMo (human-opt view)": 0.9527378721632314,
      "VeMo (max entropy view)": 0.9467146126185028,
      "VeMo (min entropy view)": 0.9527378721632314,
      "VeMo (random view)": 0.9467146126185028,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person bent down and dodged something towards the left."
  },
  "012343": {
    "text": "the sim appears to scoot across the plane.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.38684380009322833,
      "Minus Multimodal Distance": -9.054919242858887,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.491359504754655e-05,
      "MoBERT-F": 0.2671909581058078,
      "MoBERT-N": 0.4119209990966081,
      "MoBERT-min(F/N)": 0.2671909581058078,
      "MoBERT-max(F/N)": 0.4119209990966081,
      "MotionCritic": -4.601071834564209,
      "VeMo (human-opt view)": 0.6926829268292682,
      "VeMo (max entropy view)": 0.48534201954397393,
      "VeMo (min entropy view)": 0.6926829268292682,
      "VeMo (random view)": 0.48534201954397393,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The SIM appears to scoot across the plane."
  },
  "007776": {
    "text": "the person is shaking out both her arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5372410836622531,
      "Minus Multimodal Distance": -6.374590873718262,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.396436270326376e-05,
      "MoBERT-F": 0.3677478471691003,
      "MoBERT-N": 0.4442513558990418,
      "MoBERT-min(F/N)": 0.3677478471691003,
      "MoBERT-max(F/N)": 0.4442513558990418,
      "MotionCritic": -5.1753411293029785,
      "VeMo (human-opt view)": 0.9048473967684022,
      "VeMo (max entropy view)": 0.5936395759717314,
      "VeMo (min entropy view)": 0.9048473967684022,
      "VeMo (random view)": 0.9048473967684022,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is shaking out both her arms."
  },
  "003902": {
    "text": "the person checks the watch on his hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.31972132693330285,
      "Minus Multimodal Distance": -9.940912246704102,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.732965605100617e-05,
      "MoBERT-F": 0.2582291734231494,
      "MoBERT-N": 0.3409536228885665,
      "MoBERT-min(F/N)": 0.2582291734231494,
      "MoBERT-max(F/N)": 0.3409536228885665,
      "MotionCritic": -7.190652370452881,
      "VeMo (human-opt view)": 0.7058823529411765,
      "VeMo (max entropy view)": 0.3629976580796253,
      "VeMo (min entropy view)": 0.7058823529411765,
      "VeMo (random view)": 0.3629976580796253,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person checks the watch on his hand."
  },
  "011307": {
    "text": "person walks forward, stumbles back, and continues forward",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.33964367262397566,
      "Minus Multimodal Distance": -5.554260730743408,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9874088764190674,
      "MoBERT-F": 0.6287063817018499,
      "MoBERT-N": 0.7088553858075294,
      "MoBERT-min(F/N)": 0.6287063817018499,
      "MoBERT-max(F/N)": 0.7088553858075294,
      "MotionCritic": -4.651103496551514,
      "VeMo (human-opt view)": 0.8806539509536785,
      "VeMo (max entropy view)": 0.7544783983140148,
      "VeMo (min entropy view)": 0.8806539509536785,
      "VeMo (random view)": 0.7544783983140148,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, stumbles back, and continues forward."
  },
  "002781": {
    "text": "a man walks around in a complete circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8390147154501104,
      "Minus Multimodal Distance": -10.573978424072266,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.3564891219139099,
      "MoBERT-F": 0.6271805542417974,
      "MoBERT-N": 0.605706627462506,
      "MoBERT-min(F/N)": 0.605706627462506,
      "MoBERT-max(F/N)": 0.6271805542417974,
      "MotionCritic": -12.069372177124023,
      "VeMo (human-opt view)": 0.7880794701986755,
      "VeMo (max entropy view)": 0.7774193548387097,
      "VeMo (min entropy view)": 0.7880794701986755,
      "VeMo (random view)": 0.7880794701986755,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks around in a complete circle."
  },
  "001589": {
    "text": "a person runs on the spot.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3213879617629735,
      "Minus Multimodal Distance": -5.564660549163818,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.3472077373298816e-05,
      "MoBERT-F": 0.4207027757670194,
      "MoBERT-N": 0.5640666897488884,
      "MoBERT-min(F/N)": 0.4207027757670194,
      "MoBERT-max(F/N)": 0.5640666897488884,
      "MotionCritic": -6.913619041442871,
      "VeMo (human-opt view)": 0.7428571428571429,
      "VeMo (max entropy view)": 0.7428571428571429,
      "VeMo (min entropy view)": 0.004057728508680193,
      "VeMo (random view)": 0.7428571428571429,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs on the spot."
  },
  "004553": {
    "text": "a person walks forward, picks something up, puts their hand to their mouth and tilts back their head, then puts it down",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4994571524037772,
      "Minus Multimodal Distance": -4.654053211212158,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.03201286442345e-05,
      "MoBERT-F": 0.2987871919045803,
      "MoBERT-N": 0.43892591243429424,
      "MoBERT-min(F/N)": 0.2987871919045803,
      "MoBERT-max(F/N)": 0.43892591243429424,
      "MotionCritic": -4.840447425842285,
      "VeMo (human-opt view)": 0.6656394453004623,
      "VeMo (max entropy view)": 0.6656394453004623,
      "VeMo (min entropy view)": 0.004896764097127569,
      "VeMo (random view)": 0.004896764097127569,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, picks something up, puts their hand to their mouth, tilts their head back, and then puts it down."
  },
  "000076": {
    "text": "man reaches down to the left as to pick up item and then reaches to the right as if emptying item then replaces it to the left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.277378183019448,
      "Minus Multimodal Distance": -9.044611930847168,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3524964490206912e-05,
      "MoBERT-F": 0.3242055640148256,
      "MoBERT-N": 0.48262010729306004,
      "MoBERT-min(F/N)": 0.3242055640148256,
      "MoBERT-max(F/N)": 0.48262010729306004,
      "MotionCritic": -2.5446486473083496,
      "VeMo (human-opt view)": 0.8931245745405038,
      "VeMo (max entropy view)": 0.8267716535433071,
      "VeMo (min entropy view)": 0.8931245745405038,
      "VeMo (random view)": 0.8267716535433071,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The man reaches down to the left as if to pick up an item, then reaches to the right as if emptying the item, and then replaces it to the left."
  },
  "010526": {
    "text": "a person makes a toast, then raises a glass to take a drink and wipes his lips with a napkin.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3471773762028342,
      "Minus Multimodal Distance": -7.853667736053467,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.553296119207516e-05,
      "MoBERT-F": 0.28343908926242806,
      "MoBERT-N": 0.4264295051717835,
      "MoBERT-min(F/N)": 0.28343908926242806,
      "MoBERT-max(F/N)": 0.4264295051717835,
      "MotionCritic": -2.3704662322998047,
      "VeMo (human-opt view)": 3.5374846601456505e-05,
      "VeMo (max entropy view)": 4.978935888972942e-05,
      "VeMo (min entropy view)": 3.5374846601456505e-05,
      "VeMo (random view)": 4.978935888972942e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person makes a toast, then raises a glass to take a drink and wipes their lips with a napkin."
  },
  "012529": {
    "text": "a person is dancing the waltz, going in a counter-clockwise direction with the left arm out.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7970906002142426,
      "Minus Multimodal Distance": -8.836812019348145,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0053294263780117035,
      "MoBERT-F": 0.5743842897653535,
      "MoBERT-N": 0.4828627119383825,
      "MoBERT-min(F/N)": 0.4828627119383825,
      "MoBERT-max(F/N)": 0.5743842897653535,
      "MotionCritic": -3.284585475921631,
      "VeMo (human-opt view)": 0.6521739130434783,
      "VeMo (max entropy view)": 0.6521739130434783,
      "VeMo (min entropy view)": 0.7317073170731707,
      "VeMo (random view)": 0.7317073170731707,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is dancing the waltz, going in a counter-clockwise direction with the left arm out."
  },
  "004346": {
    "text": "a person is stationary moving their arms in symmetrical motion as if they are swimming.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.2934655877968195,
      "Minus Multimodal Distance": -9.138647079467773,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00010634333739290014,
      "MoBERT-F": 0.3298799250352718,
      "MoBERT-N": 0.37727109161307176,
      "MoBERT-min(F/N)": 0.3298799250352718,
      "MoBERT-max(F/N)": 0.37727109161307176,
      "MotionCritic": -10.949993133544922,
      "VeMo (human-opt view)": 0.9044967880085653,
      "VeMo (max entropy view)": 0.8934269304403318,
      "VeMo (min entropy view)": 0.9044967880085653,
      "VeMo (random view)": 0.8934269304403318,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is stationary, moving their arms in a symmetrical motion as if they are swimming."
  },
  "003584": {
    "text": "a person moves something out of the way in a effort to advance forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7696200684026278,
      "Minus Multimodal Distance": -8.85951042175293,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 7.551698945462704e-05,
      "MoBERT-F": 0.2856197612707385,
      "MoBERT-N": 0.3607551151370291,
      "MoBERT-min(F/N)": 0.2856197612707385,
      "MoBERT-max(F/N)": 0.3607551151370291,
      "MotionCritic": -1.1978881359100342,
      "VeMo (human-opt view)": 0.4226190476190476,
      "VeMo (max entropy view)": 0.4835164835164835,
      "VeMo (min entropy view)": 0.4226190476190476,
      "VeMo (random view)": 0.4835164835164835,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person moves something out of the way in an effort to move forward."
  },
  "011825": {
    "text": "a person jogs diagonally to his right and then slows down to a walk.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8179118548428556,
      "Minus Multimodal Distance": -3.3816635608673096,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.14037013053894043,
      "MoBERT-F": 0.6247325666312534,
      "MoBERT-N": 0.6751490786004757,
      "MoBERT-min(F/N)": 0.6247325666312534,
      "MoBERT-max(F/N)": 0.6751490786004757,
      "MotionCritic": -8.719718933105469,
      "VeMo (human-opt view)": 0.9646945026600032,
      "VeMo (max entropy view)": 0.9524655708573967,
      "VeMo (min entropy view)": 0.9646945026600032,
      "VeMo (random view)": 0.9524655708573967,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person jogs diagonally to their right and then slows down to a walk."
  },
  "007157": {
    "text": "a person makes several hand gestures and appears to move objects around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.34648891637609436,
      "Minus Multimodal Distance": -7.227734088897705,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8927321181981824e-05,
      "MoBERT-F": 0.31149286110416363,
      "MoBERT-N": 0.41621193215314645,
      "MoBERT-min(F/N)": 0.31149286110416363,
      "MoBERT-max(F/N)": 0.41621193215314645,
      "MotionCritic": -8.542545318603516,
      "VeMo (human-opt view)": 0.8931245745405038,
      "VeMo (max entropy view)": 0.7880386983289358,
      "VeMo (min entropy view)": 0.8931245745405038,
      "VeMo (random view)": 0.8931245745405038,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person makes several hand gestures and appears to move objects around."
  },
  "000886": {
    "text": "a person sits down in a chair and then gets back up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.722423157662356,
      "Minus Multimodal Distance": -7.9225873947143555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.030035804957151413,
      "MoBERT-F": 0.3933844490094246,
      "MoBERT-N": 0.431123385108932,
      "MoBERT-min(F/N)": 0.3933844490094246,
      "MoBERT-max(F/N)": 0.431123385108932,
      "MotionCritic": -8.204191207885742,
      "VeMo (human-opt view)": 0.021567021567021567,
      "VeMo (max entropy view)": 0.16423712342079688,
      "VeMo (min entropy view)": 0.021567021567021567,
      "VeMo (random view)": 0.021567021567021567,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits down in a chair and then gets back up."
  },
  "014049": {
    "text": "person stretches both arms up and then put arms down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3848046391414787,
      "Minus Multimodal Distance": -10.233847618103027,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.2210329663939774e-05,
      "MoBERT-F": 0.3741894479612482,
      "MoBERT-N": 0.47197206474755166,
      "MoBERT-min(F/N)": 0.3741894479612482,
      "MoBERT-max(F/N)": 0.47197206474755166,
      "MotionCritic": -7.827596187591553,
      "VeMo (human-opt view)": 0.9998086212132499,
      "VeMo (max entropy view)": 0.9996853392285163,
      "VeMo (min entropy view)": 0.9998086212132499,
      "VeMo (random view)": 0.9998086212132499,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stretches both arms up and then puts the arms down."
  },
  "006693": {
    "text": "a person runs forward to throw with the right arm",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7066937917659676,
      "Minus Multimodal Distance": -4.375535488128662,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.987143337726593,
      "MoBERT-F": 0.8234997780421486,
      "MoBERT-N": 0.8923286655678585,
      "MoBERT-min(F/N)": 0.8234997780421486,
      "MoBERT-max(F/N)": 0.8923286655678585,
      "MotionCritic": -8.908731460571289,
      "VeMo (human-opt view)": 0.17304189435336975,
      "VeMo (max entropy view)": 0.5772946859903382,
      "VeMo (min entropy view)": 0.17304189435336975,
      "VeMo (random view)": 0.5772946859903382,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person runs forward and throws with the right arm."
  },
  "003751": {
    "text": "a person placing something from left to right",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.12020923742432128,
      "Minus Multimodal Distance": -5.4726152420043945,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.373567960807122e-05,
      "MoBERT-F": 0.3120083133000719,
      "MoBERT-N": 0.3684276796901309,
      "MoBERT-min(F/N)": 0.3120083133000719,
      "MoBERT-max(F/N)": 0.3684276796901309,
      "MotionCritic": -3.012413501739502,
      "VeMo (human-opt view)": 0.2683513838748496,
      "VeMo (max entropy view)": 0.46853146853146854,
      "VeMo (min entropy view)": 0.2683513838748496,
      "VeMo (random view)": 0.2683513838748496,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person places something from left to right."
  },
  "013207": {
    "text": "a person sprinting ahead, and then slowing down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9194739499056338,
      "Minus Multimodal Distance": -4.527398586273193,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.948485002387315e-05,
      "MoBERT-F": 0.4474956636859213,
      "MoBERT-N": 0.5342370215506476,
      "MoBERT-min(F/N)": 0.4474956636859213,
      "MoBERT-max(F/N)": 0.5342370215506476,
      "MotionCritic": -7.816012382507324,
      "VeMo (human-opt view)": 0.9956601133587877,
      "VeMo (max entropy view)": 0.9908764169200995,
      "VeMo (min entropy view)": 0.9956601133587877,
      "VeMo (random view)": 0.9908764169200995,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sprints ahead and then slows down."
  },
  "011993": {
    "text": "a person appears to be playing the violin.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3117861675004542,
      "Minus Multimodal Distance": -7.419189929962158,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7200576369068585e-05,
      "MoBERT-F": 0.3433044520851102,
      "MoBERT-N": 0.3797148941909021,
      "MoBERT-min(F/N)": 0.3433044520851102,
      "MoBERT-max(F/N)": 0.3797148941909021,
      "MotionCritic": -8.364187240600586,
      "VeMo (human-opt view)": 6.3435012801014135e-06,
      "VeMo (max entropy view)": 1.7871276020604013e-05,
      "VeMo (min entropy view)": 6.3435012801014135e-06,
      "VeMo (random view)": 1.7871276020604013e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person appears to be playing the violin."
  },
  "002103": {
    "text": "a man steps forward and does a handstand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8167441519441808,
      "Minus Multimodal Distance": -7.837844371795654,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9780811667442322,
      "MoBERT-F": 0.6298486703510748,
      "MoBERT-N": 0.591643102836854,
      "MoBERT-min(F/N)": 0.591643102836854,
      "MoBERT-max(F/N)": 0.6298486703510748,
      "MotionCritic": -6.238525867462158,
      "VeMo (human-opt view)": 0.9924420119885327,
      "VeMo (max entropy view)": 0.9924420119885327,
      "VeMo (min entropy view)": 0.9933157259773713,
      "VeMo (random view)": 0.9933157259773713,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man steps forward and does a handstand."
  },
  "003900": {
    "text": "a person bends down as if pushed.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.281628077279175,
      "Minus Multimodal Distance": -9.573363304138184,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6182126021012664e-05,
      "MoBERT-F": 0.4324064928576894,
      "MoBERT-N": 0.5119116769398402,
      "MoBERT-min(F/N)": 0.4324064928576894,
      "MoBERT-max(F/N)": 0.5119116769398402,
      "MotionCritic": -6.211084842681885,
      "VeMo (human-opt view)": 0.9919110836677987,
      "VeMo (max entropy view)": 0.9808885579548275,
      "VeMo (min entropy view)": 0.9919110836677987,
      "VeMo (random view)": 0.9808885579548275,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends down as if pushed."
  },
  "000299": {
    "text": "a person walks backwards and then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2730030422615563,
      "Minus Multimodal Distance": -9.38129711151123,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9930023550987244,
      "MoBERT-F": 0.7545076427883903,
      "MoBERT-N": 0.5724964820553811,
      "MoBERT-min(F/N)": 0.5724964820553811,
      "MoBERT-max(F/N)": 0.7545076427883903,
      "MotionCritic": -6.8718743324279785,
      "VeMo (human-opt view)": 0.8873949579831932,
      "VeMo (max entropy view)": 0.8873949579831932,
      "VeMo (min entropy view)": 0.9883105697059186,
      "VeMo (random view)": 0.9883105697059186,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks backwards and then stops."
  },
  "005048": {
    "text": "person walking very slowly towards something",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.22859716746280512,
      "Minus Multimodal Distance": -10.981160163879395,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.236030847299844e-05,
      "MoBERT-F": 0.36772513473227025,
      "MoBERT-N": 0.414450820033188,
      "MoBERT-min(F/N)": 0.36772513473227025,
      "MoBERT-max(F/N)": 0.414450820033188,
      "MotionCritic": -7.792259693145752,
      "VeMo (human-opt view)": 0.9498111171073934,
      "VeMo (max entropy view)": 0.9498111171073934,
      "VeMo (min entropy view)": 0.9687956446687027,
      "VeMo (random view)": 0.9498111171073934,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking very slowly towards something."
  },
  "011583": {
    "text": "raising and lowering arms.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7399229503117059,
      "Minus Multimodal Distance": -3.1675198078155518,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.709412339958362e-05,
      "MoBERT-F": 0.47161177329732185,
      "MoBERT-N": 0.4818019836546992,
      "MoBERT-min(F/N)": 0.47161177329732185,
      "MoBERT-max(F/N)": 0.4818019836546992,
      "MotionCritic": -15.282838821411133,
      "VeMo (human-opt view)": 0.9985003665023571,
      "VeMo (max entropy view)": 0.9985003665023571,
      "VeMo (min entropy view)": 0.9988346698662891,
      "VeMo (random view)": 0.9988346698662891,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is raising and lowering their arms."
  },
  "000633": {
    "text": "a person stands while moving their right arm as if eating something with a spoon or fork three times.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8716337723598427,
      "Minus Multimodal Distance": -4.027215480804443,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4037120965658687e-05,
      "MoBERT-F": 0.365230921373124,
      "MoBERT-N": 0.36237006598536137,
      "MoBERT-min(F/N)": 0.36237006598536137,
      "MoBERT-max(F/N)": 0.365230921373124,
      "MotionCritic": -7.1277241706848145,
      "VeMo (human-opt view)": 0.01795862696067288,
      "VeMo (max entropy view)": 0.01795862696067288,
      "VeMo (min entropy view)": 0.0011705033164260631,
      "VeMo (random view)": 0.01795862696067288,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands while moving their right arm as if eating something with a spoon or fork three times."
  },
  "007946": {
    "text": "a person is sitting with the chin in hand, stands up, and then returns to sitting",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0183359855388916,
      "Minus Multimodal Distance": -8.726130485534668,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0030821312684565783,
      "MoBERT-F": 0.3370381269546745,
      "MoBERT-N": 0.3741738858388136,
      "MoBERT-min(F/N)": 0.3370381269546745,
      "MoBERT-max(F/N)": 0.3741738858388136,
      "MotionCritic": -12.023189544677734,
      "VeMo (human-opt view)": 0.6512345679012346,
      "VeMo (max entropy view)": 0.6512345679012346,
      "VeMo (min entropy view)": 0.26875852660300137,
      "VeMo (random view)": 0.26875852660300137,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is sitting with the chin in hand, stands up, and then returns to sitting."
  },
  "002259": {
    "text": "a person stands using his left hand to play a guitar moving it up and down.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.17143946690918607,
      "Minus Multimodal Distance": -6.986948013305664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.901617881434504e-05,
      "MoBERT-F": 0.28719182955157063,
      "MoBERT-N": 0.39145062361347854,
      "MoBERT-min(F/N)": 0.28719182955157063,
      "MoBERT-max(F/N)": 0.39145062361347854,
      "MotionCritic": -6.042672634124756,
      "VeMo (human-opt view)": 8.37107114636945e-06,
      "VeMo (max entropy view)": 9.564294421268692e-06,
      "VeMo (min entropy view)": 8.37107114636945e-06,
      "VeMo (random view)": 9.564294421268692e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stands and uses his left hand to play the guitar, moving it up and down."
  },
  "000972": {
    "text": "this person does a short sprint forward, holding their arms up to their chest level.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.39825571953553696,
      "Minus Multimodal Distance": -9.795478820800781,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.703538434114307e-05,
      "MoBERT-F": 0.3031937030711581,
      "MoBERT-N": 0.4197913263506973,
      "MoBERT-min(F/N)": 0.3031937030711581,
      "MoBERT-max(F/N)": 0.4197913263506973,
      "MotionCritic": -6.330256938934326,
      "VeMo (human-opt view)": 0.3778801843317972,
      "VeMo (max entropy view)": 0.3778801843317972,
      "VeMo (min entropy view)": 0.11273613650213285,
      "VeMo (random view)": 0.11273613650213285,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person does a short sprint forward, holding their arms up to chest level."
  },
  "004759": {
    "text": "a person is walking in a stumbling motion and puts up one of his hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2319547345206985,
      "Minus Multimodal Distance": -10.439095497131348,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0007013585418462753,
      "MoBERT-F": 0.5256218680982889,
      "MoBERT-N": 0.580803510991231,
      "MoBERT-min(F/N)": 0.5256218680982889,
      "MoBERT-max(F/N)": 0.580803510991231,
      "MotionCritic": -7.563921928405762,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.39280125195618154,
      "VeMo (random view)": 0.39280125195618154,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking in a stumbling motion and puts up one of his hands."
  },
  "002209": {
    "text": "a man walks in a forward arc, skewing to the right side of the screen.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8026189739260515,
      "Minus Multimodal Distance": -10.04995059967041,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.385554944339674e-05,
      "MoBERT-F": 0.3609089280029356,
      "MoBERT-N": 0.5010767746581708,
      "MoBERT-min(F/N)": 0.3609089280029356,
      "MoBERT-max(F/N)": 0.5010767746581708,
      "MotionCritic": -1.3328889608383179,
      "VeMo (human-opt view)": 0.939896654881697,
      "VeMo (max entropy view)": 0.9363316174478461,
      "VeMo (min entropy view)": 0.939896654881697,
      "VeMo (random view)": 0.9363316174478461,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks in a forward arc, skewing to the right side of the screen."
  },
  "002514": {
    "text": "a person holds both their hands up as if to look at something closely.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.22353277375730377,
      "Minus Multimodal Distance": -4.300605773925781,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.36741613864433e-05,
      "MoBERT-F": 0.33397809021385494,
      "MoBERT-N": 0.39963736655461174,
      "MoBERT-min(F/N)": 0.33397809021385494,
      "MoBERT-max(F/N)": 0.39963736655461174,
      "MotionCritic": -8.568587303161621,
      "VeMo (human-opt view)": 0.9796699354221479,
      "VeMo (max entropy view)": 0.7773851590106007,
      "VeMo (min entropy view)": 0.9796699354221479,
      "VeMo (random view)": 0.7773851590106007,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person holds both hands up as if to look at something closely."
  },
  "012362": {
    "text": "a person holds their hands together in front of themself.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3545520711001344,
      "Minus Multimodal Distance": -10.943404197692871,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.248523666523397e-05,
      "MoBERT-F": 0.4821083877085862,
      "MoBERT-N": 0.48194106056209673,
      "MoBERT-min(F/N)": 0.48194106056209673,
      "MoBERT-max(F/N)": 0.4821083877085862,
      "MotionCritic": -1.9625269174575806,
      "VeMo (human-opt view)": 0.9914510298927075,
      "VeMo (max entropy view)": 0.6223277909738717,
      "VeMo (min entropy view)": 0.9914510298927075,
      "VeMo (random view)": 0.9914510298927075,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person holds their hands together in front of themselves."
  },
  "013641": {
    "text": "person bends knees then slightly jumps into the air.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3950825627198199,
      "Minus Multimodal Distance": -11.297307968139648,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.17979508638381958,
      "MoBERT-F": 0.6934454394902294,
      "MoBERT-N": 0.6340504750535223,
      "MoBERT-min(F/N)": 0.6340504750535223,
      "MoBERT-max(F/N)": 0.6934454394902294,
      "MotionCritic": -5.009305000305176,
      "VeMo (human-opt view)": 0.6510538641686182,
      "VeMo (max entropy view)": 0.3627254509018036,
      "VeMo (min entropy view)": 0.6510538641686182,
      "VeMo (random view)": 0.6510538641686182,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person bends their knees and then slightly jumps into the air."
  },
  "006137": {
    "text": "a person propels himself and takes a long jump",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8837200021031343,
      "Minus Multimodal Distance": -5.822187423706055,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.14760631322860718,
      "MoBERT-F": 0.770054372457359,
      "MoBERT-N": 0.7558192368858154,
      "MoBERT-min(F/N)": 0.7558192368858154,
      "MoBERT-max(F/N)": 0.770054372457359,
      "MotionCritic": -4.800889015197754,
      "VeMo (human-opt view)": 0.050102951269732326,
      "VeMo (max entropy view)": 0.050102951269732326,
      "VeMo (min entropy view)": 0.02029708583739653,
      "VeMo (random view)": 0.050102951269732326,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person propels himself and takes a long jump."
  },
  "002171": {
    "text": "a person who sits down on there knees",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6414323399741,
      "Minus Multimodal Distance": -7.544463634490967,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8048287276760675e-05,
      "MoBERT-F": 0.2999980374838741,
      "MoBERT-N": 0.4165052610423287,
      "MoBERT-min(F/N)": 0.2999980374838741,
      "MoBERT-max(F/N)": 0.4165052610423287,
      "MotionCritic": -5.728201389312744,
      "VeMo (human-opt view)": 0.9947900175585044,
      "VeMo (max entropy view)": 0.9947900175585044,
      "VeMo (min entropy view)": 0.9966301777005672,
      "VeMo (random view)": 0.9947900175585044,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who gets down on their knees"
  },
  "009181": {
    "text": "a person does a swimming motion while standing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.164127707014283,
      "Minus Multimodal Distance": -10.20528507232666,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00011296260345261544,
      "MoBERT-F": 0.5498580923050669,
      "MoBERT-N": 0.5613246858384969,
      "MoBERT-min(F/N)": 0.5498580923050669,
      "MoBERT-max(F/N)": 0.5613246858384969,
      "MotionCritic": -6.033971309661865,
      "VeMo (human-opt view)": 0.7555555555555555,
      "VeMo (max entropy view)": 0.5776566757493188,
      "VeMo (min entropy view)": 0.7555555555555555,
      "VeMo (random view)": 0.5776566757493188,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does a swimming motion while standing."
  },
  "012158": {
    "text": "person is walking back and forth",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9134734398961175,
      "Minus Multimodal Distance": -8.170950889587402,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.001445428584702313,
      "MoBERT-F": 0.49312659483551635,
      "MoBERT-N": 0.617383423132938,
      "MoBERT-min(F/N)": 0.49312659483551635,
      "MoBERT-max(F/N)": 0.617383423132938,
      "MotionCritic": -25.187044143676758,
      "VeMo (human-opt view)": 0.8990578734858681,
      "VeMo (max entropy view)": 0.8990578734858681,
      "VeMo (min entropy view)": 0.9433106575963719,
      "VeMo (random view)": 0.8990578734858681,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking back and forth."
  },
  "011332": {
    "text": "walking diagonally to the left",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6391149928119072,
      "Minus Multimodal Distance": -9.802225112915039,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.4794815039495006e-05,
      "MoBERT-F": 0.4459182237238471,
      "MoBERT-N": 0.5598558362125998,
      "MoBERT-min(F/N)": 0.4459182237238471,
      "MoBERT-max(F/N)": 0.5598558362125998,
      "MotionCritic": -9.592541694641113,
      "VeMo (human-opt view)": 0.9525787469712703,
      "VeMo (max entropy view)": 0.9152086137281292,
      "VeMo (min entropy view)": 0.9525787469712703,
      "VeMo (random view)": 0.9525787469712703,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking diagonally to the left."
  },
  "002139": {
    "text": "it looks like he is lifting a box on a shelf and grabbing another box to put on shelf as well.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5882121067477983,
      "Minus Multimodal Distance": -13.218942642211914,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5415405616513453e-05,
      "MoBERT-F": 0.34279534333993134,
      "MoBERT-N": 0.3875558297716952,
      "MoBERT-min(F/N)": 0.34279534333993134,
      "MoBERT-max(F/N)": 0.3875558297716952,
      "MotionCritic": -2.655468702316284,
      "VeMo (human-opt view)": 0.0002298314035215872,
      "VeMo (max entropy view)": 0.0003801749785998278,
      "VeMo (min entropy view)": 0.0002298314035215872,
      "VeMo (random view)": 0.0003801749785998278,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "It looks like he is lifting a box onto a shelf and grabbing another box to put on the shelf as well."
  },
  "005274": {
    "text": "a person brings their right arm up and bends their elbow so it is across their stomach.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.18550461742787003,
      "Minus Multimodal Distance": -9.264045715332031,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.212756539345719e-05,
      "MoBERT-F": 0.28777738586956236,
      "MoBERT-N": 0.36376772512326716,
      "MoBERT-min(F/N)": 0.28777738586956236,
      "MoBERT-max(F/N)": 0.36376772512326716,
      "MotionCritic": -8.369661331176758,
      "VeMo (human-opt view)": 0.6218181818181818,
      "VeMo (max entropy view)": 0.6218181818181818,
      "VeMo (min entropy view)": 0.0638648537288834,
      "VeMo (random view)": 0.0638648537288834,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises their right arm and bends their elbow so that it crosses their stomach."
  },
  "014336": {
    "text": "a person raises their hands and brings them closer to their face then lowers them",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2217240087012263,
      "Minus Multimodal Distance": -8.583000183105469,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.307566268835217e-05,
      "MoBERT-F": 0.28793959769844063,
      "MoBERT-N": 0.38883776558488964,
      "MoBERT-min(F/N)": 0.28793959769844063,
      "MoBERT-max(F/N)": 0.38883776558488964,
      "MotionCritic": -7.521018981933594,
      "VeMo (human-opt view)": 0.9988294966835739,
      "VeMo (max entropy view)": 0.964969302997472,
      "VeMo (min entropy view)": 0.9988294966835739,
      "VeMo (random view)": 0.9988294966835739,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises their hands, brings them closer to their face, then lowers them."
  },
  "003142": {
    "text": "a person crawls on hands and knees forward, stands up with side showing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7316826305946479,
      "Minus Multimodal Distance": -9.422389030456543,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.739189196494408e-05,
      "MoBERT-F": 0.45623622813046816,
      "MoBERT-N": 0.5535135696803923,
      "MoBERT-min(F/N)": 0.45623622813046816,
      "MoBERT-max(F/N)": 0.5535135696803923,
      "MotionCritic": -6.180359363555908,
      "VeMo (human-opt view)": 0.964933084872099,
      "VeMo (max entropy view)": 0.964933084872099,
      "VeMo (min entropy view)": 0.9782953943885654,
      "VeMo (random view)": 0.964933084872099,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person crawls forward on hands and knees, then stands up with their side showing."
  },
  "002870": {
    "text": "a person walks from side to side with their arms hanging loosely.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9226640744809627,
      "Minus Multimodal Distance": -6.375584602355957,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.118299355264753e-05,
      "MoBERT-F": 0.40183839605874694,
      "MoBERT-N": 0.47879916948661216,
      "MoBERT-min(F/N)": 0.40183839605874694,
      "MoBERT-max(F/N)": 0.47879916948661216,
      "MotionCritic": -5.535449504852295,
      "VeMo (human-opt view)": 0.2694300518134715,
      "VeMo (max entropy view)": 0.2694300518134715,
      "VeMo (min entropy view)": 0.06007442849548113,
      "VeMo (random view)": 0.06007442849548113,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks from side to side with their arms hanging loosely."
  },
  "004698": {
    "text": "a man walks forwards at medium pace with his arms swawing gently.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.29660241958721245,
      "Minus Multimodal Distance": -4.277622699737549,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.4370048271957785e-05,
      "MoBERT-F": 0.32029558981515927,
      "MoBERT-N": 0.4947024887687457,
      "MoBERT-min(F/N)": 0.32029558981515927,
      "MoBERT-max(F/N)": 0.4947024887687457,
      "MotionCritic": -4.235887050628662,
      "VeMo (human-opt view)": 0.6929460580912863,
      "VeMo (max entropy view)": 0.6929460580912863,
      "VeMo (min entropy view)": 0.7436399217221135,
      "VeMo (random view)": 0.7436399217221135,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks forward at a medium pace with his arms swinging gently."
  },
  "013208": {
    "text": "ther person walks forward and leans down to pick something up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.697440837108205,
      "Minus Multimodal Distance": -11.923795700073242,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.16196587681770325,
      "MoBERT-F": 0.5821764341423894,
      "MoBERT-N": 0.6035022414110413,
      "MoBERT-min(F/N)": 0.5821764341423894,
      "MoBERT-max(F/N)": 0.6035022414110413,
      "MotionCritic": -6.735879898071289,
      "VeMo (human-opt view)": 0.9840662842574889,
      "VeMo (max entropy view)": 0.964964964964965,
      "VeMo (min entropy view)": 0.9840662842574889,
      "VeMo (random view)": 0.9840662842574889,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person walks forward and leans down to pick something up."
  },
  "000429": {
    "text": "a person walks forward, and repeatedly reaches down then shakes something",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.46566626919464277,
      "Minus Multimodal Distance": -3.166848659515381,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.024723920971155167,
      "MoBERT-F": 0.4739347092290657,
      "MoBERT-N": 0.5573346837431538,
      "MoBERT-min(F/N)": 0.4739347092290657,
      "MoBERT-max(F/N)": 0.5573346837431538,
      "MotionCritic": -5.386074542999268,
      "VeMo (human-opt view)": 0.8267898383371824,
      "VeMo (max entropy view)": 0.2689075630252101,
      "VeMo (min entropy view)": 0.8267898383371824,
      "VeMo (random view)": 0.8267898383371824,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward and repeatedly reaches down and then shakes something."
  },
  "001406": {
    "text": "a person walk forward cautiously with their hand against a surface.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3582507523124744,
      "Minus Multimodal Distance": -9.870694160461426,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.818837649305351e-05,
      "MoBERT-F": 0.28582583631791825,
      "MoBERT-N": 0.43015090883002466,
      "MoBERT-min(F/N)": 0.28582583631791825,
      "MoBERT-max(F/N)": 0.43015090883002466,
      "MotionCritic": -4.04994010925293,
      "VeMo (human-opt view)": 0.0016982200138373482,
      "VeMo (max entropy view)": 0.0016982200138373482,
      "VeMo (min entropy view)": 0.0006689330975267802,
      "VeMo (random view)": 0.0006689330975267802,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward cautiously with their hand against a surface."
  },
  "014244": {
    "text": "someone scrolls from right to left and then stands",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.073990066068,
      "Minus Multimodal Distance": -2.491655111312866,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.01784251071512699,
      "MoBERT-F": 0.653410839285437,
      "MoBERT-N": 0.6571950029346204,
      "MoBERT-min(F/N)": 0.653410839285437,
      "MoBERT-max(F/N)": 0.6571950029346204,
      "MotionCritic": -10.071002960205078,
      "VeMo (human-opt view)": 0.7982062780269058,
      "VeMo (max entropy view)": 0.7553444180522565,
      "VeMo (min entropy view)": 0.7982062780269058,
      "VeMo (random view)": 0.7553444180522565,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone scrolls from right to left and then stands."
  },
  "009349": {
    "text": "a man crawls forward on his stomach.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7748934374989026,
      "Minus Multimodal Distance": -8.433030128479004,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0035155515652149916,
      "MoBERT-F": 0.6272465285500639,
      "MoBERT-N": 0.6603003389357023,
      "MoBERT-min(F/N)": 0.6272465285500639,
      "MoBERT-max(F/N)": 0.6603003389357023,
      "MotionCritic": -5.078800678253174,
      "VeMo (human-opt view)": 0.766295707472178,
      "VeMo (max entropy view)": 0.562015503875969,
      "VeMo (min entropy view)": 0.766295707472178,
      "VeMo (random view)": 0.766295707472178,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man crawls forward on his stomach."
  },
  "008443": {
    "text": "the man is moving his arms",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.29130547446163196,
      "Minus Multimodal Distance": -2.8907687664031982,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.374396667117253e-05,
      "MoBERT-F": 0.367697377699127,
      "MoBERT-N": 0.4353433920307231,
      "MoBERT-min(F/N)": 0.367697377699127,
      "MoBERT-max(F/N)": 0.4353433920307231,
      "MotionCritic": -12.079517364501953,
      "VeMo (human-opt view)": 0.9896500739280434,
      "VeMo (max entropy view)": 0.9896500739280434,
      "VeMo (min entropy view)": 0.9933087856831769,
      "VeMo (random view)": 0.9896500739280434,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man is moving his arms."
  },
  "002733": {
    "text": "cheerfully walking forward with each step.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.49655506660665766,
      "Minus Multimodal Distance": -6.301775932312012,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.579627809813246e-05,
      "MoBERT-F": 0.34909177833788,
      "MoBERT-N": 0.4657565882215386,
      "MoBERT-min(F/N)": 0.34909177833788,
      "MoBERT-max(F/N)": 0.4657565882215386,
      "MotionCritic": -4.7762370109558105,
      "VeMo (human-opt view)": 0.851664984863774,
      "VeMo (max entropy view)": 0.851664984863774,
      "VeMo (min entropy view)": 0.8522483940042827,
      "VeMo (random view)": 0.851664984863774,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Cheerfully walking forward with each step."
  },
  "005486": {
    "text": "the man throws  both hands",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9645897524414104,
      "Minus Multimodal Distance": -10.197433471679688,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.012732108123600483,
      "MoBERT-F": 0.6053761296882991,
      "MoBERT-N": 0.6174756780890079,
      "MoBERT-min(F/N)": 0.6053761296882991,
      "MoBERT-max(F/N)": 0.6174756780890079,
      "MotionCritic": -8.538055419921875,
      "VeMo (human-opt view)": 0.9809264305177112,
      "VeMo (max entropy view)": 0.9649805447470817,
      "VeMo (min entropy view)": 0.9809264305177112,
      "VeMo (random view)": 0.9809264305177112,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man throws both hands."
  },
  "008068": {
    "text": "the person is sitting down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1018406131128986,
      "Minus Multimodal Distance": -7.802614212036133,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.7026761174201965,
      "MoBERT-F": 0.5613727331479275,
      "MoBERT-N": 0.5297566460247576,
      "MoBERT-min(F/N)": 0.5297566460247576,
      "MoBERT-max(F/N)": 0.5613727331479275,
      "MotionCritic": -5.407296657562256,
      "VeMo (human-opt view)": 0.39215686274509803,
      "VeMo (max entropy view)": 0.39215686274509803,
      "VeMo (min entropy view)": 0.6518771331058021,
      "VeMo (random view)": 0.39215686274509803,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is sitting down."
  },
  "004398": {
    "text": "a man takes sideways steps to his right, then immediately takes sideways steps to his left back towards his original position.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.35234006708379606,
      "Minus Multimodal Distance": -4.673681735992432,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.002266567200422287,
      "MoBERT-F": 0.48688198681527384,
      "MoBERT-N": 0.5811715475990492,
      "MoBERT-min(F/N)": 0.48688198681527384,
      "MoBERT-max(F/N)": 0.5811715475990492,
      "MotionCritic": -3.8095898628234863,
      "VeMo (human-opt view)": 0.974080771549126,
      "VeMo (max entropy view)": 0.9707291112293773,
      "VeMo (min entropy view)": 0.974080771549126,
      "VeMo (random view)": 0.974080771549126,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man takes sideways steps to his right, then immediately takes sideways steps to his left, back towards his original position."
  },
  "009714": {
    "text": "a person makes a \"fast break\" motion, zig-zagging briefly from left to right then darting forward to the right.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.231070659346423,
      "Minus Multimodal Distance": -2.053297281265259,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.2886545658111572,
      "MoBERT-F": 0.5730421226127328,
      "MoBERT-N": 0.6020188033335132,
      "MoBERT-min(F/N)": 0.5730421226127328,
      "MoBERT-max(F/N)": 0.6020188033335132,
      "MotionCritic": -5.758599758148193,
      "VeMo (human-opt view)": 0.5615942028985508,
      "VeMo (max entropy view)": 0.5615942028985508,
      "VeMo (min entropy view)": 0.6085626911314985,
      "VeMo (random view)": 0.5615942028985508,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person makes a \"fast break\" motion, zig - zagging briefly from left to right, then darting forward to the right."
  },
  "002954": {
    "text": "walks in a tight circle the runs back and walks in a tight circle.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8587160428121821,
      "Minus Multimodal Distance": -7.585158824920654,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.679042700037826e-05,
      "MoBERT-F": 0.41345559474278065,
      "MoBERT-N": 0.5623488188510744,
      "MoBERT-min(F/N)": 0.41345559474278065,
      "MoBERT-max(F/N)": 0.5623488188510744,
      "MotionCritic": -14.437447547912598,
      "VeMo (human-opt view)": 0.7053140096618358,
      "VeMo (max entropy view)": 0.6933333333333334,
      "VeMo (min entropy view)": 0.7053140096618358,
      "VeMo (random view)": 0.7053140096618358,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks in a tight circle, then runs back and walks in a tight circle."
  },
  "000337": {
    "text": "a person walks down stairs while holding a railing with his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8390501215212316,
      "Minus Multimodal Distance": -9.985511779785156,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.7590969693847e-05,
      "MoBERT-F": 0.372928538495953,
      "MoBERT-N": 0.4515009850989548,
      "MoBERT-min(F/N)": 0.372928538495953,
      "MoBERT-max(F/N)": 0.4515009850989548,
      "MotionCritic": -3.2400429248809814,
      "VeMo (human-opt view)": 1.8333867775435906e-05,
      "VeMo (max entropy view)": 4.153257217297405e-05,
      "VeMo (min entropy view)": 1.8333867775435906e-05,
      "VeMo (random view)": 1.8333867775435906e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks down the stairs while holding the railing with his right hand."
  },
  "006741": {
    "text": "a person does jumping jacks, stumbles and then backs up to continue.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7544971958889951,
      "Minus Multimodal Distance": -10.896777153015137,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9018863439559937,
      "MoBERT-F": 0.7271451850260041,
      "MoBERT-N": 0.6525962900484672,
      "MoBERT-min(F/N)": 0.6525962900484672,
      "MoBERT-max(F/N)": 0.7271451850260041,
      "MotionCritic": -3.976935625076294,
      "VeMo (human-opt view)": 0.7974137931034483,
      "VeMo (max entropy view)": 0.7974137931034483,
      "VeMo (min entropy view)": 0.8736717827626919,
      "VeMo (random view)": 0.8736717827626919,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does jumping jacks, stumbles, and then backs up to continue."
  },
  "004881": {
    "text": "character lowers left hand then waves right hand in the air.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5228604482192603,
      "Minus Multimodal Distance": -10.861838340759277,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.376423046574928e-05,
      "MoBERT-F": 0.2772102526508462,
      "MoBERT-N": 0.36532487926985274,
      "MoBERT-min(F/N)": 0.2772102526508462,
      "MoBERT-max(F/N)": 0.36532487926985274,
      "MotionCritic": -9.762916564941406,
      "VeMo (human-opt view)": 0.8932676518883416,
      "VeMo (max entropy view)": 0.8932676518883416,
      "VeMo (min entropy view)": 0.9100443727309399,
      "VeMo (random view)": 0.9100443727309399,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The character lowers their left hand and then waves their right hand in the air."
  },
  "013316": {
    "text": "move the both the hand and after hold the legs.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6051435305912829,
      "Minus Multimodal Distance": -5.633236885070801,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.8811321650864556e-05,
      "MoBERT-F": 0.2710668441281742,
      "MoBERT-N": 0.37480357149071064,
      "MoBERT-min(F/N)": 0.2710668441281742,
      "MoBERT-max(F/N)": 0.37480357149071064,
      "MotionCritic": -1.6570535898208618,
      "VeMo (human-opt view)": 0.1921182266009852,
      "VeMo (max entropy view)": 0.1921182266009852,
      "VeMo (min entropy view)": 0.05021116846550915,
      "VeMo (random view)": 0.05021116846550915,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person moves both hands and then holds the legs."
  },
  "000612": {
    "text": "the person is around and i think holding someone dancing.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1944459890682373,
      "Minus Multimodal Distance": -5.698245525360107,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3852797312429175e-05,
      "MoBERT-F": 0.43723320825748296,
      "MoBERT-N": 0.4338888615910618,
      "MoBERT-min(F/N)": 0.4338888615910618,
      "MoBERT-max(F/N)": 0.43723320825748296,
      "MotionCritic": -7.769947528839111,
      "VeMo (human-opt view)": 0.09007417873542918,
      "VeMo (max entropy view)": 0.09007417873542918,
      "VeMo (min entropy view)": 0.085,
      "VeMo (random view)": 0.085,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is around, and I think they're holding someone and dancing."
  },
  "012602": {
    "text": "a person is repeatedly raising and lowering their forearms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4417134607072778,
      "Minus Multimodal Distance": -7.9087347984313965,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 6.0726815718226135e-05,
      "MoBERT-F": 0.3046747756212335,
      "MoBERT-N": 0.40637500086786804,
      "MoBERT-min(F/N)": 0.3046747756212335,
      "MoBERT-max(F/N)": 0.40637500086786804,
      "MotionCritic": -7.540743350982666,
      "VeMo (human-opt view)": 0.9466089466089466,
      "VeMo (max entropy view)": 0.9466089466089466,
      "VeMo (min entropy view)": 0.9578713968957872,
      "VeMo (random view)": 0.9578713968957872,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is repeatedly raising and lowering their forearms."
  },
  "002383": {
    "text": "person walks forward slowly and hesitates before standing upright",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.2088260892842898,
      "Minus Multimodal Distance": -8.972319602966309,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 9.108849189942703e-05,
      "MoBERT-F": 0.2968124742292504,
      "MoBERT-N": 0.42021367735227577,
      "MoBERT-min(F/N)": 0.2968124742292504,
      "MoBERT-max(F/N)": 0.42021367735227577,
      "MotionCritic": -6.422778129577637,
      "VeMo (human-opt view)": 0.9875370919881306,
      "VeMo (max entropy view)": 0.9525787469712703,
      "VeMo (min entropy view)": 0.9875370919881306,
      "VeMo (random view)": 0.9875370919881306,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person walks forward slowly and hesitates before standing upright."
  },
  "006473": {
    "text": "the sim is walking in a circle motion.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9119411173162869,
      "Minus Multimodal Distance": -6.102593898773193,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.09724504500627518,
      "MoBERT-F": 0.6315486994473873,
      "MoBERT-N": 0.5852319972603681,
      "MoBERT-min(F/N)": 0.5852319972603681,
      "MoBERT-max(F/N)": 0.6315486994473873,
      "MotionCritic": -10.567790031433105,
      "VeMo (human-opt view)": 0.6931106471816284,
      "VeMo (max entropy view)": 0.6931106471816284,
      "VeMo (min entropy view)": 0.7318718381112985,
      "VeMo (random view)": 0.6931106471816284,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking in a circular motion."
  },
  "010816": {
    "text": "person is walking while kicking out legs",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5143486754778652,
      "Minus Multimodal Distance": -10.003291130065918,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.906831261701882e-05,
      "MoBERT-F": 0.4613472915839699,
      "MoBERT-N": 0.630039341044514,
      "MoBERT-min(F/N)": 0.4613472915839699,
      "MoBERT-max(F/N)": 0.630039341044514,
      "MotionCritic": -6.828458309173584,
      "VeMo (human-opt view)": 0.8519968676585747,
      "VeMo (max entropy view)": 0.8350983358547656,
      "VeMo (min entropy view)": 0.8519968676585747,
      "VeMo (random view)": 0.8350983358547656,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking while kicking their legs out."
  },
  "014222": {
    "text": "a blind folded person walks around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1092685276239,
      "Minus Multimodal Distance": -4.4420390129089355,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.07404807209968567,
      "MoBERT-F": 0.614972324784699,
      "MoBERT-N": 0.5930810376728882,
      "MoBERT-min(F/N)": 0.5930810376728882,
      "MoBERT-max(F/N)": 0.614972324784699,
      "MotionCritic": -18.36484718322754,
      "VeMo (human-opt view)": 0.6226086956521739,
      "VeMo (max entropy view)": 0.6226086956521739,
      "VeMo (min entropy view)": 0.6509433962264151,
      "VeMo (random view)": 0.6226086956521739,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A blindfolded person walks around."
  },
  "006251": {
    "text": "the person is walking normally.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.25881594963392857,
      "Minus Multimodal Distance": -8.518280029296875,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.768082726513967e-05,
      "MoBERT-F": 0.5388161814815834,
      "MoBERT-N": 0.6114041539753747,
      "MoBERT-min(F/N)": 0.5388161814815834,
      "MoBERT-max(F/N)": 0.6114041539753747,
      "MotionCritic": -2.766221046447754,
      "VeMo (human-opt view)": 0.9577464788732394,
      "VeMo (max entropy view)": 0.9577464788732394,
      "VeMo (min entropy view)": 0.9580124223602484,
      "VeMo (random view)": 0.9580124223602484,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking normally."
  },
  "002323": {
    "text": "a person standing up throws something forward from above their head, then throws something again forward from above their head with more force which makes them take one step forward with their right foot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.58012587850782,
      "Minus Multimodal Distance": -9.029206275939941,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.02561311237514019,
      "MoBERT-F": 0.6404614472041501,
      "MoBERT-N": 0.5678306533767573,
      "MoBERT-min(F/N)": 0.5678306533767573,
      "MoBERT-max(F/N)": 0.6404614472041501,
      "MotionCritic": -4.533114433288574,
      "VeMo (human-opt view)": 0.5617529880478087,
      "VeMo (max entropy view)": 0.5617529880478087,
      "VeMo (min entropy view)": 0.7767857142857143,
      "VeMo (random view)": 0.5617529880478087,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person standing up throws something forward from above their head. Then, they throw something again forward from above their head with more force, causing them to take one step forward with their right foot."
  },
  "014020": {
    "text": "a person is standing up straight, with their arms stretched out. they then bring both hands toward their face simultainiously.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3960895902139334,
      "Minus Multimodal Distance": -3.049370527267456,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.95178867620416e-05,
      "MoBERT-F": 0.3748363516549888,
      "MoBERT-N": 0.39169627461844986,
      "MoBERT-min(F/N)": 0.3748363516549888,
      "MoBERT-max(F/N)": 0.39169627461844986,
      "MotionCritic": -8.56355094909668,
      "VeMo (human-opt view)": 0.9706907153105144,
      "VeMo (max entropy view)": 0.9496290511518938,
      "VeMo (min entropy view)": 0.9706907153105144,
      "VeMo (random view)": 0.9496290511518938,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is standing up straight, with their arms stretched out. They then bring both hands toward their face simultaneously."
  },
  "012555": {
    "text": "a person kicks their right foot in front of them in a dangling motion and then puts it back on the ground.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4355517107776837,
      "Minus Multimodal Distance": -10.058680534362793,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9943364262580872,
      "MoBERT-F": 0.8158429598359214,
      "MoBERT-N": 0.7058949950804114,
      "MoBERT-min(F/N)": 0.7058949950804114,
      "MoBERT-max(F/N)": 0.8158429598359214,
      "MotionCritic": -5.757161617279053,
      "VeMo (human-opt view)": 0.9902897292176521,
      "VeMo (max entropy view)": 0.9902897292176521,
      "VeMo (min entropy view)": 0.9908716663683551,
      "VeMo (random view)": 0.9908716663683551,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person kicks their right foot forward in a dangling motion and then places it back on the ground."
  },
  "003696": {
    "text": "a person walks forward quickly and leaves a trail behind them as if they are dripping paint",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.42584217290878934,
      "Minus Multimodal Distance": -3.131798267364502,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.35926592350006104,
      "MoBERT-F": 0.6124700119012154,
      "MoBERT-N": 0.5795453033686746,
      "MoBERT-min(F/N)": 0.5795453033686746,
      "MoBERT-max(F/N)": 0.6124700119012154,
      "MotionCritic": 2.947105884552002,
      "VeMo (human-opt view)": 0.00626894175281272,
      "VeMo (max entropy view)": 0.029226785053644098,
      "VeMo (min entropy view)": 0.00626894175281272,
      "VeMo (random view)": 0.00626894175281272,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward quickly and leaves a trail behind them, as if they are dripping paint."
  },
  "008824": {
    "text": "the person waddles low to the ground and then stands up and walks back.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2254647078927792,
      "Minus Multimodal Distance": -6.634729862213135,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.7186815738677979,
      "MoBERT-F": 0.584756300362269,
      "MoBERT-N": 0.4869247821987073,
      "MoBERT-min(F/N)": 0.4869247821987073,
      "MoBERT-max(F/N)": 0.584756300362269,
      "MotionCritic": -4.993997573852539,
      "VeMo (human-opt view)": 0.9580531540185719,
      "VeMo (max entropy view)": 0.9526488513830286,
      "VeMo (min entropy view)": 0.9580531540185719,
      "VeMo (random view)": 0.9580531540185719,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person waddles close to the ground, then stands up and walks back."
  },
  "006784": {
    "text": "a person picks a drink up with their right hand and finishes it.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2730326081784383,
      "Minus Multimodal Distance": -8.316899299621582,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 7.841896876925603e-05,
      "MoBERT-F": 0.25934454103734356,
      "MoBERT-N": 0.3581674393981085,
      "MoBERT-min(F/N)": 0.25934454103734356,
      "MoBERT-max(F/N)": 0.3581674393981085,
      "MotionCritic": -9.603263854980469,
      "VeMo (human-opt view)": 2.2801940119362157e-05,
      "VeMo (max entropy view)": 2.2801940119362157e-05,
      "VeMo (min entropy view)": 1.7230133463242254e-05,
      "VeMo (random view)": 1.7230133463242254e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person picks up a drink with their right hand and finishes it."
  },
  "004556": {
    "text": "someone walks forward and to the right, then stands looking straight ahead.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.14740325912858876,
      "Minus Multimodal Distance": -8.628756523132324,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.106829485157505e-05,
      "MoBERT-F": 0.327040496862935,
      "MoBERT-N": 0.46643852293607846,
      "MoBERT-min(F/N)": 0.327040496862935,
      "MoBERT-max(F/N)": 0.46643852293607846,
      "MotionCritic": -1.112289309501648,
      "VeMo (human-opt view)": 0.8170212765957446,
      "VeMo (max entropy view)": 0.8170212765957446,
      "VeMo (min entropy view)": 0.9706019598693421,
      "VeMo (random view)": 0.9706019598693421,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone walks forward and to the right, then stands looking straight ahead."
  },
  "001209": {
    "text": "the person was moving his arms around like he was drunk.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8999951678636691,
      "Minus Multimodal Distance": -3.1715149879455566,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00018005854508373886,
      "MoBERT-F": 0.5822308737125335,
      "MoBERT-N": 0.5729819947544932,
      "MoBERT-min(F/N)": 0.5729819947544932,
      "MoBERT-max(F/N)": 0.5822308737125335,
      "MotionCritic": -3.9830808639526367,
      "VeMo (human-opt view)": 0.8805528134254689,
      "VeMo (max entropy view)": 0.8805528134254689,
      "VeMo (min entropy view)": 0.8870116156282999,
      "VeMo (random view)": 0.8805528134254689,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was moving his arms around as if he was drunk."
  },
  "014043": {
    "text": "a person sits down on something and then stands back up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7507121283009041,
      "Minus Multimodal Distance": -9.437163352966309,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9867173433303833,
      "MoBERT-F": 0.6948628402767112,
      "MoBERT-N": 0.5434486503485594,
      "MoBERT-min(F/N)": 0.5434486503485594,
      "MoBERT-max(F/N)": 0.6948628402767112,
      "MotionCritic": -7.861225605010986,
      "VeMo (human-opt view)": 0.9868071013627233,
      "VeMo (max entropy view)": 0.9868071013627233,
      "VeMo (min entropy view)": 0.9933162876239178,
      "VeMo (random view)": 0.9868071013627233,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits down on something and then stands back up."
  },
  "001349": {
    "text": "a man opens something and rubs it under his arms.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3271253806152285,
      "Minus Multimodal Distance": -2.60103702545166,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.3672393328743055e-05,
      "MoBERT-F": 0.2888960665609463,
      "MoBERT-N": 0.41361220182126857,
      "MoBERT-min(F/N)": 0.2888960665609463,
      "MoBERT-max(F/N)": 0.41361220182126857,
      "MotionCritic": -9.419661521911621,
      "VeMo (human-opt view)": 0.40774487471526194,
      "VeMo (max entropy view)": 0.40774487471526194,
      "VeMo (min entropy view)": 0.002046584153591268,
      "VeMo (random view)": 0.40774487471526194,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man opens something and rubs it under his arms."
  },
  "011645": {
    "text": "a person plays the guitar, strumming with their left hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.18287579257240016,
      "Minus Multimodal Distance": -8.770366668701172,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.049374183523469e-05,
      "MoBERT-F": 0.2890040729114556,
      "MoBERT-N": 0.3726306216901429,
      "MoBERT-min(F/N)": 0.2890040729114556,
      "MoBERT-max(F/N)": 0.3726306216901429,
      "MotionCritic": -4.838718891143799,
      "VeMo (human-opt view)": 6.357788354311916e-06,
      "VeMo (max entropy view)": 8.649811973497365e-06,
      "VeMo (min entropy view)": 6.357788354311916e-06,
      "VeMo (random view)": 6.357788354311916e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is playing the guitar, strumming with their left hand."
  },
  "009220": {
    "text": "a hunched individual slowly wobbles forward in a drunken manner.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5555977824750213,
      "Minus Multimodal Distance": -3.380112648010254,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.520911584724672e-05,
      "MoBERT-F": 0.34525828995773683,
      "MoBERT-N": 0.5063446626911969,
      "MoBERT-min(F/N)": 0.34525828995773683,
      "MoBERT-max(F/N)": 0.5063446626911969,
      "MotionCritic": -14.571422576904297,
      "VeMo (human-opt view)": 0.9326705829191143,
      "VeMo (max entropy view)": 0.9326705829191143,
      "VeMo (min entropy view)": 0.9688699360341151,
      "VeMo (random view)": 0.9326705829191143,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A hunched individual slowly wobbles forward in a drunken manner."
  },
  "012355": {
    "text": "a person is putting both hands to his face.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2410603997552316,
      "Minus Multimodal Distance": -6.397944927215576,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.328558861743659e-05,
      "MoBERT-F": 0.31375003423061576,
      "MoBERT-N": 0.37912065748191537,
      "MoBERT-min(F/N)": 0.31375003423061576,
      "MoBERT-max(F/N)": 0.37912065748191537,
      "MotionCritic": -8.133642196655273,
      "VeMo (human-opt view)": 0.9978226391763663,
      "VeMo (max entropy view)": 0.39211136890951276,
      "VeMo (min entropy view)": 0.9978226391763663,
      "VeMo (random view)": 0.39211136890951276,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is putting both hands on his face."
  },
  "003191": {
    "text": "a person lobs tennis balls with their right arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7443442221374752,
      "Minus Multimodal Distance": -5.845700263977051,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4499888240825385e-05,
      "MoBERT-F": 0.44301379143509645,
      "MoBERT-N": 0.4529009610567317,
      "MoBERT-min(F/N)": 0.44301379143509645,
      "MoBERT-max(F/N)": 0.4529009610567317,
      "MotionCritic": -3.0637288093566895,
      "VeMo (human-opt view)": 0.06776859504132231,
      "VeMo (max entropy view)": 0.06776859504132231,
      "VeMo (min entropy view)": 0.0260462393912789,
      "VeMo (random view)": 0.06776859504132231,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lobs tennis balls with their right arm."
  },
  "007889": {
    "text": "the body slides to the left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.36078117614518446,
      "Minus Multimodal Distance": -4.147586822509766,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6569730835035443e-05,
      "MoBERT-F": 0.3187746895641582,
      "MoBERT-N": 0.4401210370347162,
      "MoBERT-min(F/N)": 0.3187746895641582,
      "MoBERT-max(F/N)": 0.4401210370347162,
      "MotionCritic": -6.993322849273682,
      "VeMo (human-opt view)": 0.826992103374013,
      "VeMo (max entropy view)": 0.826992103374013,
      "VeMo (min entropy view)": 0.8353188507358094,
      "VeMo (random view)": 0.8353188507358094,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The body slides to the left."
  },
  "000296": {
    "text": "someone working out the right arm",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7782748162898473,
      "Minus Multimodal Distance": -9.934121131896973,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.7356323446147144e-05,
      "MoBERT-F": 0.3699095602424886,
      "MoBERT-N": 0.42609577828981304,
      "MoBERT-min(F/N)": 0.3699095602424886,
      "MoBERT-max(F/N)": 0.42609577828981304,
      "MotionCritic": -7.017866611480713,
      "VeMo (human-opt view)": 0.8933500627352572,
      "VeMo (max entropy view)": 0.8178438661710037,
      "VeMo (min entropy view)": 0.8933500627352572,
      "VeMo (random view)": 0.8178438661710037,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone is working out the right arm."
  },
  "012049": {
    "text": "a person jumps sideways to their right several times, then several times to the left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.813532686517436,
      "Minus Multimodal Distance": -10.111536026000977,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9947600960731506,
      "MoBERT-F": 0.7246512510966977,
      "MoBERT-N": 0.7290413935207174,
      "MoBERT-min(F/N)": 0.7246512510966977,
      "MoBERT-max(F/N)": 0.7290413935207174,
      "MotionCritic": -8.290246963500977,
      "VeMo (human-opt view)": 0.546448087431694,
      "VeMo (max entropy view)": 0.546448087431694,
      "VeMo (min entropy view)": 0.6081081081081081,
      "VeMo (random view)": 0.6081081081081081,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps sideways to their right several times, then several times to the left."
  },
  "009773": {
    "text": "a man swings his arms to pick up a bucket and pour it out.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5833560332176124,
      "Minus Multimodal Distance": -7.193583011627197,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.068785635055974e-05,
      "MoBERT-F": 0.2788647264766132,
      "MoBERT-N": 0.4126736453045459,
      "MoBERT-min(F/N)": 0.2788647264766132,
      "MoBERT-max(F/N)": 0.4126736453045459,
      "MotionCritic": -5.132586479187012,
      "VeMo (human-opt view)": 0.003830248279440265,
      "VeMo (max entropy view)": 0.010969299758537427,
      "VeMo (min entropy view)": 0.003830248279440265,
      "VeMo (random view)": 0.003830248279440265,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man swings his arms to pick up a bucket and pour its contents out."
  },
  "012568": {
    "text": "the person does a couple of small kicks with his left leg",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5350109716565321,
      "Minus Multimodal Distance": -10.024785041809082,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9676153659820557,
      "MoBERT-F": 0.8104181764159657,
      "MoBERT-N": 0.7897745094866446,
      "MoBERT-min(F/N)": 0.7897745094866446,
      "MoBERT-max(F/N)": 0.8104181764159657,
      "MotionCritic": -3.631847858428955,
      "VeMo (human-opt view)": 0.9466872110939908,
      "VeMo (max entropy view)": 0.9283135636926251,
      "VeMo (min entropy view)": 0.9466872110939908,
      "VeMo (random view)": 0.9466872110939908,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person does a couple of small kicks with his left leg."
  },
  "011385": {
    "text": "the person is picking something up and putting it on something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5828306975515405,
      "Minus Multimodal Distance": -10.940699577331543,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.534468330850359e-05,
      "MoBERT-F": 0.36420144369809004,
      "MoBERT-N": 0.4328174259601608,
      "MoBERT-min(F/N)": 0.36420144369809004,
      "MoBERT-max(F/N)": 0.4328174259601608,
      "MotionCritic": -6.944300651550293,
      "VeMo (human-opt view)": 0.06752591762398431,
      "VeMo (max entropy view)": 0.06752591762398431,
      "VeMo (min entropy view)": 0.02158273381294964,
      "VeMo (random view)": 0.06752591762398431,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is picking something up and putting it on something."
  },
  "004968": {
    "text": "a person throws and object and then catches another object",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6113634604047462,
      "Minus Multimodal Distance": -3.357125997543335,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9690096198464744e-05,
      "MoBERT-F": 0.3864876204075183,
      "MoBERT-N": 0.522583077560968,
      "MoBERT-min(F/N)": 0.3864876204075183,
      "MoBERT-max(F/N)": 0.522583077560968,
      "MotionCritic": -5.079990863800049,
      "VeMo (human-opt view)": 0.00021586006954015164,
      "VeMo (max entropy view)": 0.07589285714285714,
      "VeMo (min entropy view)": 0.00021586006954015164,
      "VeMo (random view)": 0.00021586006954015164,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person throws an object and then catches another object."
  },
  "013543": {
    "text": "a person made a micro move to bent the knee",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9423976979897746,
      "Minus Multimodal Distance": -11.288532257080078,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 8.884721319191158e-05,
      "MoBERT-F": 0.47948376065810927,
      "MoBERT-N": 0.512225131261448,
      "MoBERT-min(F/N)": 0.47948376065810927,
      "MoBERT-max(F/N)": 0.512225131261448,
      "MotionCritic": -6.267061710357666,
      "VeMo (human-opt view)": 0.9579367516119128,
      "VeMo (max entropy view)": 0.9400570884871551,
      "VeMo (min entropy view)": 0.9579367516119128,
      "VeMo (random view)": 0.9579367516119128,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person made a micromove to bend the knee."
  },
  "000601": {
    "text": "person is sitting down scratching head i think.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0515352174479695,
      "Minus Multimodal Distance": -4.466688632965088,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.085953358095139e-05,
      "MoBERT-F": 0.32461302739708764,
      "MoBERT-N": 0.4240861187953851,
      "MoBERT-min(F/N)": 0.32461302739708764,
      "MoBERT-max(F/N)": 0.4240861187953851,
      "MotionCritic": -3.5882487297058105,
      "VeMo (human-opt view)": 1.7749704872231708e-05,
      "VeMo (max entropy view)": 2.7575364390056838e-05,
      "VeMo (min entropy view)": 1.7749704872231708e-05,
      "VeMo (random view)": 2.7575364390056838e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is sitting down, scratching their head."
  },
  "009216": {
    "text": "a person who seems to put slippers on",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5707840076273208,
      "Minus Multimodal Distance": -7.450031280517578,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.9923910410143435e-05,
      "MoBERT-F": 0.3139121870358945,
      "MoBERT-N": 0.4042408678755525,
      "MoBERT-min(F/N)": 0.3139121870358945,
      "MoBERT-max(F/N)": 0.4042408678755525,
      "MotionCritic": -7.799022674560547,
      "VeMo (human-opt view)": 0.00010884066038293254,
      "VeMo (max entropy view)": 0.0003803936845482252,
      "VeMo (min entropy view)": 0.00010884066038293254,
      "VeMo (random view)": 0.00010884066038293254,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person who seems to be putting on slippers."
  },
  "001538": {
    "text": "a person walks up and tosses something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7115395450822859,
      "Minus Multimodal Distance": -5.064725875854492,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5018050074577332,
      "MoBERT-F": 0.5656720664861277,
      "MoBERT-N": 0.620371840462917,
      "MoBERT-min(F/N)": 0.5656720664861277,
      "MoBERT-max(F/N)": 0.620371840462917,
      "MotionCritic": -5.831942081451416,
      "VeMo (human-opt view)": 0.8079350766456267,
      "VeMo (max entropy view)": 0.8079350766456267,
      "VeMo (min entropy view)": 0.8522483940042827,
      "VeMo (random view)": 0.8522483940042827,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks up and tosses something."
  },
  "007655": {
    "text": "figure appears to be fighting or dancing",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0424637510211199,
      "Minus Multimodal Distance": -9.060397148132324,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.4131893515586853,
      "MoBERT-F": 0.6954187201215565,
      "MoBERT-N": 0.7355803240288854,
      "MoBERT-min(F/N)": 0.6954187201215565,
      "MoBERT-max(F/N)": 0.7355803240288854,
      "MotionCritic": -7.333259105682373,
      "VeMo (human-opt view)": 0.982010582010582,
      "VeMo (max entropy view)": 0.974020938348197,
      "VeMo (min entropy view)": 0.982010582010582,
      "VeMo (random view)": 0.974020938348197,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The figure appears to be either fighting or dancing."
  },
  "013833": {
    "text": "the person crouches and walks forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7703025291629131,
      "Minus Multimodal Distance": -10.465590476989746,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9105338454246521,
      "MoBERT-F": 0.6251242062896781,
      "MoBERT-N": 0.5055274494426943,
      "MoBERT-min(F/N)": 0.5055274494426943,
      "MoBERT-max(F/N)": 0.6251242062896781,
      "MotionCritic": -2.12168550491333,
      "VeMo (human-opt view)": 0.9940828402366864,
      "VeMo (max entropy view)": 0.9896907216494846,
      "VeMo (min entropy view)": 0.9940828402366864,
      "VeMo (random view)": 0.9896907216494846,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person crouches and walks forward."
  },
  "006258": {
    "text": "the man moves to the side",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.47883862564095225,
      "Minus Multimodal Distance": -6.551358699798584,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4584331185906194e-05,
      "MoBERT-F": 0.407943766562727,
      "MoBERT-N": 0.48225102633487,
      "MoBERT-min(F/N)": 0.407943766562727,
      "MoBERT-max(F/N)": 0.48225102633487,
      "MotionCritic": -7.055219650268555,
      "VeMo (human-opt view)": 0.9687866697958226,
      "VeMo (max entropy view)": 0.9553956834532374,
      "VeMo (min entropy view)": 0.9687866697958226,
      "VeMo (random view)": 0.9553956834532374,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man moves to the side."
  },
  "001285": {
    "text": "someone doing the chicken dance",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7138965409003417,
      "Minus Multimodal Distance": -2.5714635848999023,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.850673809007276e-05,
      "MoBERT-F": 0.32580452193767095,
      "MoBERT-N": 0.4005233600774144,
      "MoBERT-min(F/N)": 0.32580452193767095,
      "MoBERT-max(F/N)": 0.4005233600774144,
      "MotionCritic": -8.697103500366211,
      "VeMo (human-opt view)": 3.437124867534171e-05,
      "VeMo (max entropy view)": 0.0015978008762133837,
      "VeMo (min entropy view)": 3.437124867534171e-05,
      "VeMo (random view)": 3.437124867534171e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone is doing the chicken dance."
  },
  "001520": {
    "text": "someone kick twice doing karate",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7923279534679275,
      "Minus Multimodal Distance": -5.577510356903076,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9927103519439697,
      "MoBERT-F": 0.8303304972051718,
      "MoBERT-N": 0.7652916192221468,
      "MoBERT-min(F/N)": 0.7652916192221468,
      "MoBERT-max(F/N)": 0.8303304972051718,
      "MotionCritic": -5.641969203948975,
      "VeMo (human-opt view)": 0.9882607910420805,
      "VeMo (max entropy view)": 0.98302985259507,
      "VeMo (min entropy view)": 0.9882607910420805,
      "VeMo (random view)": 0.9882607910420805,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone kicks twice while doing karate."
  },
  "007625": {
    "text": "a person who is prone pushes himself up off the ground using his arms and propping himself with his knees before standing awkwardly in a ready to wrestle position.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5182638793369403,
      "Minus Multimodal Distance": -8.098031997680664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0016716899117454886,
      "MoBERT-F": 0.5071624964685602,
      "MoBERT-N": 0.3961186048933958,
      "MoBERT-min(F/N)": 0.3961186048933958,
      "MoBERT-max(F/N)": 0.5071624964685602,
      "MotionCritic": -4.867471218109131,
      "VeMo (human-opt view)": 0.9940746706552379,
      "VeMo (max entropy view)": 0.9914443375689769,
      "VeMo (min entropy view)": 0.9940746706552379,
      "VeMo (random view)": 0.9940746706552379,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person who is prone pushes himself up off the ground using his arms and props himself on his knees before standing awkwardly in a ready - to - wrestle position."
  },
  "013112": {
    "text": "a person grabs a small invisible object from his left and places it directly in front of them.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.40626146549529035,
      "Minus Multimodal Distance": -6.64879846572876,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.394334271433763e-05,
      "MoBERT-F": 0.2905551561579374,
      "MoBERT-N": 0.42958816961291924,
      "MoBERT-min(F/N)": 0.2905551561579374,
      "MoBERT-max(F/N)": 0.42958816961291924,
      "MotionCritic": -2.90704607963562,
      "VeMo (human-opt view)": 0.7060998151571165,
      "VeMo (max entropy view)": 0.7060998151571165,
      "VeMo (min entropy view)": 0.7773722627737226,
      "VeMo (random view)": 0.7773722627737226,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person grabs a small invisible object from their left and places it directly in front of them."
  },
  "005347": {
    "text": "a person walks on a beam with arms straight up in the air, stops and lowers his arms.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4595857902037706,
      "Minus Multimodal Distance": -5.149868488311768,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 9.297794167650864e-05,
      "MoBERT-F": 0.5402955628737922,
      "MoBERT-N": 0.6065254327302638,
      "MoBERT-min(F/N)": 0.5402955628737922,
      "MoBERT-max(F/N)": 0.6065254327302638,
      "MotionCritic": -5.458587169647217,
      "VeMo (human-opt view)": 0.8076514346439958,
      "VeMo (max entropy view)": 0.8076514346439958,
      "VeMo (min entropy view)": 0.8086785009861933,
      "VeMo (random view)": 0.8076514346439958,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks on a beam with their arms straight up in the air, then stops and lowers their arms."
  },
  "011904": {
    "text": "a person sits down at a chair, then moves the chair around while they are still in it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1334442656621542,
      "Minus Multimodal Distance": -8.760936737060547,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00106268550734967,
      "MoBERT-F": 0.33610343727600345,
      "MoBERT-N": 0.34985460708170824,
      "MoBERT-min(F/N)": 0.33610343727600345,
      "MoBERT-max(F/N)": 0.34985460708170824,
      "MotionCritic": -7.874432563781738,
      "VeMo (human-opt view)": 0.0003790534049900044,
      "VeMo (max entropy view)": 0.0003790534049900044,
      "VeMo (min entropy view)": 4.139810141210107e-05,
      "VeMo (random view)": 4.139810141210107e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sits down on a chair, then moves the chair around while they are still in it."
  },
  "006980": {
    "text": "a person walks forward, steps over something with his right leg, and then he continues walking forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5238430088638704,
      "Minus Multimodal Distance": -3.381235122680664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4838704121066257e-05,
      "MoBERT-F": 0.3888781768388632,
      "MoBERT-N": 0.5650343442254199,
      "MoBERT-min(F/N)": 0.3888781768388632,
      "MoBERT-max(F/N)": 0.5650343442254199,
      "MotionCritic": -7.524036884307861,
      "VeMo (human-opt view)": 0.9146537842190016,
      "VeMo (max entropy view)": 0.9048843187660668,
      "VeMo (min entropy view)": 0.9146537842190016,
      "VeMo (random view)": 0.9146537842190016,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward, steps over something with his right leg, and then continues walking forward."
  },
  "010282": {
    "text": "a perso bend the knee and leand to the floor and start walked",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.0677861373230193,
      "Minus Multimodal Distance": -2.8673956394195557,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.47067493200302124,
      "MoBERT-F": 0.5333754174588132,
      "MoBERT-N": 0.5101691527526686,
      "MoBERT-min(F/N)": 0.5101691527526686,
      "MoBERT-max(F/N)": 0.5333754174588132,
      "MotionCritic": -6.01193380355835,
      "VeMo (human-opt view)": 0.9238171611868484,
      "VeMo (max entropy view)": 0.9238171611868484,
      "VeMo (min entropy view)": 0.9553170731707317,
      "VeMo (random view)": 0.9553170731707317,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person bends the knee, leans to the floor, and starts walking."
  },
  "012330": {
    "text": "a person crosses their arms, then has their arms angrily at their sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.20493349655126208,
      "Minus Multimodal Distance": -9.692473411560059,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.689824006869458e-05,
      "MoBERT-F": 0.30785388989073287,
      "MoBERT-N": 0.4470835808211413,
      "MoBERT-min(F/N)": 0.30785388989073287,
      "MoBERT-max(F/N)": 0.4470835808211413,
      "MotionCritic": -3.41253662109375,
      "VeMo (human-opt view)": 0.972302604381976,
      "VeMo (max entropy view)": 0.9098646034816248,
      "VeMo (min entropy view)": 0.972302604381976,
      "VeMo (random view)": 0.9098646034816248,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person crosses their arms, then puts their arms angrily at their sides."
  },
  "008484": {
    "text": "a person stretches their shoulders by moving their bent arms forward and backward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6192206079618781,
      "Minus Multimodal Distance": -10.437163352966309,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.285089330049232e-05,
      "MoBERT-F": 0.3272174285100965,
      "MoBERT-N": 0.3969195101999757,
      "MoBERT-min(F/N)": 0.3272174285100965,
      "MoBERT-max(F/N)": 0.3969195101999757,
      "MotionCritic": -6.592803001403809,
      "VeMo (human-opt view)": 0.4235294117647059,
      "VeMo (max entropy view)": 0.4235294117647059,
      "VeMo (min entropy view)": 0.5783132530120482,
      "VeMo (random view)": 0.4235294117647059,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stretches their shoulders by moving their bent arms forward and backward."
  },
  "003278": {
    "text": "a person marches forward aggressively than turns around and does the same.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7960087853538829,
      "Minus Multimodal Distance": -7.9296183586120605,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9128194451332092,
      "MoBERT-F": 0.7347739119654757,
      "MoBERT-N": 0.6890122518245388,
      "MoBERT-min(F/N)": 0.6890122518245388,
      "MoBERT-max(F/N)": 0.7347739119654757,
      "MotionCritic": -11.634435653686523,
      "VeMo (human-opt view)": 0.6933333333333334,
      "VeMo (max entropy view)": 0.4681933842239186,
      "VeMo (min entropy view)": 0.6933333333333334,
      "VeMo (random view)": 0.4681933842239186,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person marches forward aggressively, then turns around and does the same."
  },
  "009903": {
    "text": "a person begins to walk forward up the stairs",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.32688508342587563,
      "Minus Multimodal Distance": -12.221697807312012,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00033201088081113994,
      "MoBERT-F": 0.5323262314766728,
      "MoBERT-N": 0.6021872154062242,
      "MoBERT-min(F/N)": 0.5323262314766728,
      "MoBERT-max(F/N)": 0.6021872154062242,
      "MotionCritic": -4.441621780395508,
      "VeMo (human-opt view)": 0.17288135593220338,
      "VeMo (max entropy view)": 0.17288135593220338,
      "VeMo (min entropy view)": 0.16417910447761194,
      "VeMo (random view)": 0.16417910447761194,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person begins to walk forward up the stairs."
  },
  "009517": {
    "text": "this person dribbles with his right hand and shoots.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5515776913904488,
      "Minus Multimodal Distance": -9.429255485534668,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.893675809900742e-05,
      "MoBERT-F": 0.33051801009122805,
      "MoBERT-N": 0.41181806092585477,
      "MoBERT-min(F/N)": 0.33051801009122805,
      "MoBERT-max(F/N)": 0.41181806092585477,
      "MotionCritic": -1.3126639127731323,
      "VeMo (human-opt view)": 4.684496484887681e-05,
      "VeMo (max entropy view)": 0.0003053891995246231,
      "VeMo (min entropy view)": 4.684496484887681e-05,
      "VeMo (random view)": 0.0003053891995246231,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "This person dribbles with his right hand and shoots."
  },
  "009331": {
    "text": "a man moves something from one spot to another.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3693356869116468,
      "Minus Multimodal Distance": -6.008816719055176,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.479215956758708e-05,
      "MoBERT-F": 0.3387497788611772,
      "MoBERT-N": 0.36756050555331604,
      "MoBERT-min(F/N)": 0.3387497788611772,
      "MoBERT-max(F/N)": 0.36756050555331604,
      "MotionCritic": -5.007419109344482,
      "VeMo (human-opt view)": 0.7057010785824345,
      "VeMo (max entropy view)": 0.7057010785824345,
      "VeMo (min entropy view)": 0.7192982456140351,
      "VeMo (random view)": 0.7057010785824345,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man moves something from one spot to another."
  },
  "011972": {
    "text": "a figure steps backward cockily, swinging their arms",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5174215846956213,
      "Minus Multimodal Distance": -2.6929850578308105,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.983514666557312,
      "MoBERT-F": 0.7391270406799184,
      "MoBERT-N": 0.7007463019309983,
      "MoBERT-min(F/N)": 0.7007463019309983,
      "MoBERT-max(F/N)": 0.7391270406799184,
      "MotionCritic": -5.6072468757629395,
      "VeMo (human-opt view)": 0.07579717720857292,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.07579717720857292,
      "VeMo (random view)": 0.5,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A figure steps backward cockily, swinging their arms."
  },
  "009905": {
    "text": "a person sits cross legged then stands up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7522561774592974,
      "Minus Multimodal Distance": -6.996984004974365,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.07100158929824829,
      "MoBERT-F": 0.4567376118390303,
      "MoBERT-N": 0.4185814598190646,
      "MoBERT-min(F/N)": 0.4185814598190646,
      "MoBERT-max(F/N)": 0.4567376118390303,
      "MotionCritic": -4.953314781188965,
      "VeMo (human-opt view)": 0.9928709142267856,
      "VeMo (max entropy view)": 0.978352313532369,
      "VeMo (min entropy view)": 0.9928709142267856,
      "VeMo (random view)": 0.978352313532369,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits cross - legged and then stands up."
  },
  "013224": {
    "text": "the person uses their arms to warm up",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.38728057611391586,
      "Minus Multimodal Distance": -4.408439636230469,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.349765236431267e-05,
      "MoBERT-F": 0.4422412805736704,
      "MoBERT-N": 0.5071920119013548,
      "MoBERT-min(F/N)": 0.4422412805736704,
      "MoBERT-max(F/N)": 0.5071920119013548,
      "MotionCritic": -7.170125484466553,
      "VeMo (human-opt view)": 0.9896547969073287,
      "VeMo (max entropy view)": 0.987559070305719,
      "VeMo (min entropy view)": 0.9896547969073287,
      "VeMo (random view)": 0.9896547969073287,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person uses their arms to warm up."
  },
  "006577": {
    "text": "the person is stopping with their left foot.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.32763094325317305,
      "Minus Multimodal Distance": -4.59732723236084,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5001565518323332e-05,
      "MoBERT-F": 0.3537086171494623,
      "MoBERT-N": 0.46316447600668864,
      "MoBERT-min(F/N)": 0.3537086171494623,
      "MoBERT-max(F/N)": 0.46316447600668864,
      "MotionCritic": -9.587281227111816,
      "VeMo (human-opt view)": 0.40765391014975044,
      "VeMo (max entropy view)": 0.5319587628865979,
      "VeMo (min entropy view)": 0.40765391014975044,
      "VeMo (random view)": 0.5319587628865979,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is stopping with their left foot."
  },
  "005698": {
    "text": "a person swing with the legs and dance",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.218152519462496,
      "Minus Multimodal Distance": -10.612675666809082,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9584344029426575,
      "MoBERT-F": 0.7146491879254016,
      "MoBERT-N": 0.6869539370418536,
      "MoBERT-min(F/N)": 0.6869539370418536,
      "MoBERT-max(F/N)": 0.7146491879254016,
      "MotionCritic": -6.6550822257995605,
      "VeMo (human-opt view)": 0.9045362220717671,
      "VeMo (max entropy view)": 0.9045362220717671,
      "VeMo (min entropy view)": 0.9101251422070534,
      "VeMo (random view)": 0.9101251422070534,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person swings their legs and dances."
  },
  "001983": {
    "text": "a person walks and steps over something.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8522866204398496,
      "Minus Multimodal Distance": -10.870743751525879,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.310664058313705e-05,
      "MoBERT-F": 0.3458713010022833,
      "MoBERT-N": 0.5567357214534271,
      "MoBERT-min(F/N)": 0.3458713010022833,
      "MoBERT-max(F/N)": 0.5567357214534271,
      "MotionCritic": -3.6386160850524902,
      "VeMo (human-opt view)": 0.9098712446351931,
      "VeMo (max entropy view)": 0.8931245745405038,
      "VeMo (min entropy view)": 0.9098712446351931,
      "VeMo (random view)": 0.9098712446351931,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks and steps over something."
  },
  "012543": {
    "text": "this person was forward then stops.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.43484833691304536,
      "Minus Multimodal Distance": -10.886119842529297,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 6.442660378525034e-05,
      "MoBERT-F": 0.34371042112641265,
      "MoBERT-N": 0.4593120360088838,
      "MoBERT-min(F/N)": 0.34371042112641265,
      "MoBERT-max(F/N)": 0.4593120360088838,
      "MotionCritic": -6.957882404327393,
      "VeMo (human-opt view)": 0.9850687364335986,
      "VeMo (max entropy view)": 0.8872037914691943,
      "VeMo (min entropy view)": 0.9850687364335986,
      "VeMo (random view)": 0.9850687364335986,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person was moving forward and then stopped."
  },
  "006371": {
    "text": "the person is doing arm gestures.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3237788415173028,
      "Minus Multimodal Distance": -5.247049331665039,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.709610246005468e-05,
      "MoBERT-F": 0.4385877800779599,
      "MoBERT-N": 0.453108620112092,
      "MoBERT-min(F/N)": 0.4385877800779599,
      "MoBERT-max(F/N)": 0.453108620112092,
      "MotionCritic": -6.163829326629639,
      "VeMo (human-opt view)": 0.9975279674864876,
      "VeMo (max entropy view)": 0.9941032647777938,
      "VeMo (min entropy view)": 0.9975279674864876,
      "VeMo (random view)": 0.9975279674864876,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is making arm gestures."
  },
  "001330": {
    "text": "a person uses right arm to hit down to the left and then the left arm crosses over the right side of the body.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.40865652128340263,
      "Minus Multimodal Distance": -7.047857761383057,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9307066142791882e-05,
      "MoBERT-F": 0.29665035231214504,
      "MoBERT-N": 0.39488084428619064,
      "MoBERT-min(F/N)": 0.29665035231214504,
      "MoBERT-max(F/N)": 0.39488084428619064,
      "MotionCritic": -3.112614154815674,
      "VeMo (human-opt view)": 0.7316017316017316,
      "VeMo (max entropy view)": 0.7316017316017316,
      "VeMo (min entropy view)": 0.7549407114624506,
      "VeMo (random view)": 0.7316017316017316,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person uses the right arm to hit down to the left, and then the left arm crosses over the right side of the body."
  },
  "012616": {
    "text": "a person walks in a clockwise circle while bent forward at the waist, and holding their right leg with both hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7772639010625685,
      "Minus Multimodal Distance": -7.042804718017578,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.985194052802399e-05,
      "MoBERT-F": 0.27763689061457875,
      "MoBERT-N": 0.47546065398807014,
      "MoBERT-min(F/N)": 0.27763689061457875,
      "MoBERT-max(F/N)": 0.47546065398807014,
      "MotionCritic": -12.350741386413574,
      "VeMo (human-opt view)": 0.591715976331361,
      "VeMo (max entropy view)": 0.591715976331361,
      "VeMo (min entropy view)": 0.5925925925925926,
      "VeMo (random view)": 0.591715976331361,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks in a clockwise circle while bent forward at the waist and holding their right leg with both hands."
  },
  "000088": {
    "text": "someone puts both of their hands on their chests and appears to be laughing. then waves their right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.35262464115270653,
      "Minus Multimodal Distance": -9.476913452148438,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5801109586609527e-05,
      "MoBERT-F": 0.32349177004866025,
      "MoBERT-N": 0.43855109695796446,
      "MoBERT-min(F/N)": 0.32349177004866025,
      "MoBERT-max(F/N)": 0.43855109695796446,
      "MotionCritic": -5.513513088226318,
      "VeMo (human-opt view)": 0.7194570135746606,
      "VeMo (max entropy view)": 0.7194570135746606,
      "VeMo (min entropy view)": 0.00522318387121193,
      "VeMo (random view)": 0.7194570135746606,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone puts both hands on their chest and appears to be laughing. Then they wave their right hand."
  },
  "007089": {
    "text": "a person laying face down on the ground and then slowly crawling backwards",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7588452284815425,
      "Minus Multimodal Distance": -10.138238906860352,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.2029557228088379,
      "MoBERT-F": 0.49215757438450164,
      "MoBERT-N": 0.3953723715567695,
      "MoBERT-min(F/N)": 0.3953723715567695,
      "MoBERT-max(F/N)": 0.49215757438450164,
      "MotionCritic": -6.270021915435791,
      "VeMo (human-opt view)": 0.9724845995893224,
      "VeMo (max entropy view)": 0.9724845995893224,
      "VeMo (min entropy view)": 0.9740340414958379,
      "VeMo (random view)": 0.9724845995893224,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is lying face down on the ground and then slowly crawling backwards."
  },
  "009768": {
    "text": "a person side steps to the right and then to the left",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3718870890237822,
      "Minus Multimodal Distance": -10.515405654907227,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0005770598072558641,
      "MoBERT-F": 0.5178842240802219,
      "MoBERT-N": 0.6390553324581246,
      "MoBERT-min(F/N)": 0.5178842240802219,
      "MoBERT-max(F/N)": 0.6390553324581246,
      "MotionCritic": -4.025001525878906,
      "VeMo (human-opt view)": 0.960200041675349,
      "VeMo (max entropy view)": 0.9243633599391866,
      "VeMo (min entropy view)": 0.960200041675349,
      "VeMo (random view)": 0.960200041675349,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person side - steps to the right and then to the left."
  },
  "010068": {
    "text": "a person appears to be hitting a ball with their right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6598764022167424,
      "Minus Multimodal Distance": -9.807443618774414,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.6543240892351605e-05,
      "MoBERT-F": 0.3870485239680433,
      "MoBERT-N": 0.4150762269372691,
      "MoBERT-min(F/N)": 0.3870485239680433,
      "MoBERT-max(F/N)": 0.4150762269372691,
      "MotionCritic": -4.601037502288818,
      "VeMo (human-opt view)": 0.0805398345668263,
      "VeMo (max entropy view)": 0.0805398345668263,
      "VeMo (min entropy view)": 0.0260462393912789,
      "VeMo (random view)": 0.0805398345668263,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person appears to be hitting a ball with their right hand."
  },
  "009041": {
    "text": "a person does a throwing motion with his right arm",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5710276435702323,
      "Minus Multimodal Distance": -3.0281569957733154,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9299345016479492,
      "MoBERT-F": 0.8275494292421672,
      "MoBERT-N": 0.8693759341710847,
      "MoBERT-min(F/N)": 0.8275494292421672,
      "MoBERT-max(F/N)": 0.8693759341710847,
      "MotionCritic": -7.858804702758789,
      "VeMo (human-opt view)": 0.9238171611868484,
      "VeMo (max entropy view)": 0.9238171611868484,
      "VeMo (min entropy view)": 0.9668085106382979,
      "VeMo (random view)": 0.9238171611868484,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person makes a throwing motion with their right arm."
  },
  "002755": {
    "text": "a person jogs on the spot, then stands still",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3930439965371977,
      "Minus Multimodal Distance": -10.933341979980469,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 9.749044693307951e-05,
      "MoBERT-F": 0.5870712433257894,
      "MoBERT-N": 0.7125037399654818,
      "MoBERT-min(F/N)": 0.5870712433257894,
      "MoBERT-max(F/N)": 0.7125037399654818,
      "MotionCritic": -7.6090922355651855,
      "VeMo (human-opt view)": 0.9360824742268041,
      "VeMo (max entropy view)": 0.6374695863746959,
      "VeMo (min entropy view)": 0.9360824742268041,
      "VeMo (random view)": 0.6374695863746959,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jogs on the spot and then stands still."
  },
  "009199": {
    "text": "the person is trying to talk with his hands.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.49411001285988493,
      "Minus Multimodal Distance": -6.4986572265625,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5037568775587715e-05,
      "MoBERT-F": 0.4347860067134288,
      "MoBERT-N": 0.43452866974147064,
      "MoBERT-min(F/N)": 0.43452866974147064,
      "MoBERT-max(F/N)": 0.4347860067134288,
      "MotionCritic": -4.017894268035889,
      "VeMo (human-opt view)": 0.9365128522762465,
      "VeMo (max entropy view)": 0.9048361934477379,
      "VeMo (min entropy view)": 0.9365128522762465,
      "VeMo (random view)": 0.9365128522762465,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is trying to talk with his hands."
  },
  "009961": {
    "text": "a person is practicing karate moves across the floor",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.2556632923284712,
      "Minus Multimodal Distance": -6.851938247680664,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.7532361602643505e-05,
      "MoBERT-F": 0.4632098794890879,
      "MoBERT-N": 0.5154989653897891,
      "MoBERT-min(F/N)": 0.4632098794890879,
      "MoBERT-max(F/N)": 0.5154989653897891,
      "MotionCritic": -6.445338249206543,
      "VeMo (human-opt view)": 0.9325899645210339,
      "VeMo (max entropy view)": 0.9146486701071854,
      "VeMo (min entropy view)": 0.9325899645210339,
      "VeMo (random view)": 0.9325899645210339,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is practicing karate moves across the floor."
  },
  "013164": {
    "text": "the person is standing with arms by their side",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4928063528884725,
      "Minus Multimodal Distance": -5.4219770431518555,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9516870199586265e-05,
      "MoBERT-F": 0.30278552067690384,
      "MoBERT-N": 0.4102819927501133,
      "MoBERT-min(F/N)": 0.30278552067690384,
      "MoBERT-max(F/N)": 0.4102819927501133,
      "MotionCritic": -7.430843830108643,
      "VeMo (human-opt view)": 0.994431471169391,
      "VeMo (max entropy view)": 0.9883058470764617,
      "VeMo (min entropy view)": 0.994431471169391,
      "VeMo (random view)": 0.9883058470764617,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is standing with their arms by their sides."
  },
  "005793": {
    "text": "the person is walking on a treadmill slowly.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4313474825952313,
      "Minus Multimodal Distance": -6.144248008728027,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.3025340851745568e-05,
      "MoBERT-F": 0.5068155282454009,
      "MoBERT-N": 0.5844642255679917,
      "MoBERT-min(F/N)": 0.5068155282454009,
      "MoBERT-max(F/N)": 0.5844642255679917,
      "MotionCritic": -9.023628234863281,
      "VeMo (human-opt view)": 0.6363636363636364,
      "VeMo (max entropy view)": 0.5778443113772455,
      "VeMo (min entropy view)": 0.6363636363636364,
      "VeMo (random view)": 0.5778443113772455,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking slowly on a treadmill."
  },
  "009730": {
    "text": "person is acting like a human monkey.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7812533726194113,
      "Minus Multimodal Distance": -5.816194534301758,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9258262515068054,
      "MoBERT-F": 0.7699520303048135,
      "MoBERT-N": 0.6568850692091863,
      "MoBERT-min(F/N)": 0.6568850692091863,
      "MoBERT-max(F/N)": 0.7699520303048135,
      "MotionCritic": -9.641539573669434,
      "VeMo (human-opt view)": 0.9048473967684022,
      "VeMo (max entropy view)": 0.9048473967684022,
      "VeMo (min entropy view)": 0.9363166953528399,
      "VeMo (random view)": 0.9363166953528399,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is acting like a human monkey."
  },
  "007095": {
    "text": "a person walks around obstacles.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8653970571262688,
      "Minus Multimodal Distance": -9.431938171386719,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.08821253478527069,
      "MoBERT-F": 0.5128640462239555,
      "MoBERT-N": 0.5053359100163808,
      "MoBERT-min(F/N)": 0.5053359100163808,
      "MoBERT-max(F/N)": 0.5128640462239555,
      "MotionCritic": -1.7551597356796265,
      "VeMo (human-opt view)": 0.09043927648578812,
      "VeMo (max entropy view)": 0.11906400550584996,
      "VeMo (min entropy view)": 0.09043927648578812,
      "VeMo (random view)": 0.11906400550584996,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks around obstacles."
  },
  "008708": {
    "text": "walking backwards and stopping.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.21787002675918912,
      "Minus Multimodal Distance": -5.281975746154785,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9947870969772339,
      "MoBERT-F": 0.6546783804249243,
      "MoBERT-N": 0.5723536142535519,
      "MoBERT-min(F/N)": 0.5723536142535519,
      "MoBERT-max(F/N)": 0.6546783804249243,
      "MotionCritic": -5.691346645355225,
      "VeMo (human-opt view)": 0.8356374807987711,
      "VeMo (max entropy view)": 0.8356374807987711,
      "VeMo (min entropy view)": 0.9831029185867896,
      "VeMo (random view)": 0.8356374807987711,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking backwards and stopping."
  },
  "000827": {
    "text": "a man walks foward ,makes a u turn to the right side ,walks and then stops.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.751148648592239,
      "Minus Multimodal Distance": -4.907064914703369,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.143307523918338e-05,
      "MoBERT-F": 0.2705916514603959,
      "MoBERT-N": 0.4227643220884097,
      "MoBERT-min(F/N)": 0.2705916514603959,
      "MoBERT-max(F/N)": 0.4227643220884097,
      "MotionCritic": -1.5001981258392334,
      "VeMo (human-opt view)": 0.9323692045937899,
      "VeMo (max entropy view)": 0.9323692045937899,
      "VeMo (min entropy view)": 0.9466019417475728,
      "VeMo (random view)": 0.9323692045937899,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks forward, makes a U - turn to the right side, walks, and then stops."
  },
  "004293": {
    "text": "a person confidently walks down an aisle while stretching/loosening up their arm.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.420167226281122,
      "Minus Multimodal Distance": -5.442166328430176,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.0502433219226077e-05,
      "MoBERT-F": 0.4593837550032762,
      "MoBERT-N": 0.447487543225048,
      "MoBERT-min(F/N)": 0.447487543225048,
      "MoBERT-max(F/N)": 0.4593837550032762,
      "MotionCritic": -10.325204849243164,
      "VeMo (human-opt view)": 0.9769706540334254,
      "VeMo (max entropy view)": 0.96256,
      "VeMo (min entropy view)": 0.9769706540334254,
      "VeMo (random view)": 0.9769706540334254,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person confidently walks down an aisle while stretching or loosening up their arm."
  },
  "013286": {
    "text": "a person stands in place and then steps sideways to the left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.15583052629334093,
      "Minus Multimodal Distance": -5.988290309906006,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.64276623056503e-05,
      "MoBERT-F": 0.40514600512701693,
      "MoBERT-N": 0.46674133501361026,
      "MoBERT-min(F/N)": 0.40514600512701693,
      "MoBERT-max(F/N)": 0.46674133501361026,
      "MotionCritic": -9.933000564575195,
      "VeMo (human-opt view)": 0.9819830291758689,
      "VeMo (max entropy view)": 0.9604957397366383,
      "VeMo (min entropy view)": 0.9819830291758689,
      "VeMo (random view)": 0.9819830291758689,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands in place and then steps sideways to the left."
  },
  "001473": {
    "text": "a person sits in a  chair then stands back up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6777639985346708,
      "Minus Multimodal Distance": -8.777734756469727,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.1515379101037979,
      "MoBERT-F": 0.4797749013299648,
      "MoBERT-N": 0.4381655830383766,
      "MoBERT-min(F/N)": 0.4381655830383766,
      "MoBERT-max(F/N)": 0.4797749013299648,
      "MotionCritic": -10.069735527038574,
      "VeMo (human-opt view)": 0.48360655737704916,
      "VeMo (max entropy view)": 0.48360655737704916,
      "VeMo (min entropy view)": 0.43737166324435317,
      "VeMo (random view)": 0.43737166324435317,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits in a chair and then stands back up."
  },
  "011731": {
    "text": "a person raised the hands and pull it down",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.33160898376209275,
      "Minus Multimodal Distance": -6.675119400024414,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.5734308261889964e-05,
      "MoBERT-F": 0.3110664470088257,
      "MoBERT-N": 0.3789144739875425,
      "MoBERT-min(F/N)": 0.3110664470088257,
      "MoBERT-max(F/N)": 0.3789144739875425,
      "MotionCritic": -12.296548843383789,
      "VeMo (human-opt view)": 0.986784140969163,
      "VeMo (max entropy view)": 0.986784140969163,
      "VeMo (min entropy view)": 0.9964162810015427,
      "VeMo (random view)": 0.986784140969163,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raised the hands and pulled them down."
  },
  "002517": {
    "text": "the figure stays put but moves arms upward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3269971164416666,
      "Minus Multimodal Distance": -8.962077140808105,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9507022190955468e-05,
      "MoBERT-F": 0.4534652319993537,
      "MoBERT-N": 0.5134989216971586,
      "MoBERT-min(F/N)": 0.4534652319993537,
      "MoBERT-max(F/N)": 0.5134989216971586,
      "MotionCritic": -8.63299560546875,
      "VeMo (human-opt view)": 0.9740082079343365,
      "VeMo (max entropy view)": 0.9628318584070796,
      "VeMo (min entropy view)": 0.9740082079343365,
      "VeMo (random view)": 0.9740082079343365,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The figure stays put but moves its arms upward."
  },
  "013648": {
    "text": "moving hands from side to side above head.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5058233580633988,
      "Minus Multimodal Distance": -4.687927722930908,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5063322027563117e-05,
      "MoBERT-F": 0.3937971665974908,
      "MoBERT-N": 0.4490879292804658,
      "MoBERT-min(F/N)": 0.3937971665974908,
      "MoBERT-max(F/N)": 0.4490879292804658,
      "MotionCritic": 0.4100857675075531,
      "VeMo (human-opt view)": 0.9467084639498433,
      "VeMo (max entropy view)": 0.9284928492849285,
      "VeMo (min entropy view)": 0.9467084639498433,
      "VeMo (random view)": 0.9284928492849285,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is moving hands from side to side above the head."
  },
  "006576": {
    "text": "person is getting into a crouch position.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9796504755091413,
      "Minus Multimodal Distance": -4.679410934448242,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9587776064872742,
      "MoBERT-F": 0.6381297326686984,
      "MoBERT-N": 0.5491482418927482,
      "MoBERT-min(F/N)": 0.5491482418927482,
      "MoBERT-max(F/N)": 0.6381297326686984,
      "MotionCritic": -7.999716281890869,
      "VeMo (human-opt view)": 0.9979559978956265,
      "VeMo (max entropy view)": 0.9953712275504536,
      "VeMo (min entropy view)": 0.9979559978956265,
      "VeMo (random view)": 0.9953712275504536,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is getting into a crouching position."
  },
  "011255": {
    "text": "a man lifts his right arm up",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6769630703931989,
      "Minus Multimodal Distance": -13.538946151733398,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.565087485592812e-05,
      "MoBERT-F": 0.2975834968627819,
      "MoBERT-N": 0.39021046895957695,
      "MoBERT-min(F/N)": 0.2975834968627819,
      "MoBERT-max(F/N)": 0.39021046895957695,
      "MotionCritic": -10.82457160949707,
      "VeMo (human-opt view)": 0.9941032647777938,
      "VeMo (max entropy view)": 0.9840841650930672,
      "VeMo (min entropy view)": 0.9941032647777938,
      "VeMo (random view)": 0.9840841650930672,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man lifts his right arm up."
  },
  "004960": {
    "text": "person is adjusting something on their head",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4863026913146058,
      "Minus Multimodal Distance": -5.612281322479248,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4037946786847897e-05,
      "MoBERT-F": 0.43327804778335915,
      "MoBERT-N": 0.45841131425210596,
      "MoBERT-min(F/N)": 0.43327804778335915,
      "MoBERT-max(F/N)": 0.45841131425210596,
      "MotionCritic": -2.134890079498291,
      "VeMo (human-opt view)": 0.9783037475345168,
      "VeMo (max entropy view)": 0.9783037475345168,
      "VeMo (min entropy view)": 0.9959244164505372,
      "VeMo (random view)": 0.9959244164505372,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is adjusting something on their head."
  },
  "005433": {
    "text": "person sits on floor with legs crossed",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5352576502338969,
      "Minus Multimodal Distance": -2.251866579055786,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.5705562829971313,
      "MoBERT-F": 0.5384367360550992,
      "MoBERT-N": 0.5930444177313473,
      "MoBERT-min(F/N)": 0.5384367360550992,
      "MoBERT-max(F/N)": 0.5930444177313473,
      "MotionCritic": -4.872392177581787,
      "VeMo (human-opt view)": 0.8077858880778589,
      "VeMo (max entropy view)": 0.6666666666666666,
      "VeMo (min entropy view)": 0.8077858880778589,
      "VeMo (random view)": 0.8077858880778589,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits on the floor with their legs crossed."
  },
  "009383": {
    "text": "a person stayed on the place",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.357740482677138,
      "Minus Multimodal Distance": -3.0576930046081543,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.276241841376759e-05,
      "MoBERT-F": 0.2971974147763755,
      "MoBERT-N": 0.44825550691478677,
      "MoBERT-min(F/N)": 0.2971974147763755,
      "MoBERT-max(F/N)": 0.44825550691478677,
      "MotionCritic": -8.394315719604492,
      "VeMo (human-opt view)": 0.93270960777258,
      "VeMo (max entropy view)": 0.93270960777258,
      "VeMo (min entropy view)": 0.9819791988466687,
      "VeMo (random view)": 0.9819791988466687,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person stayed at the place."
  },
  "003784": {
    "text": "a man walks in clockwise direction.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4624905558551347,
      "Minus Multimodal Distance": -5.42498254776001,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.000446286634542048,
      "MoBERT-F": 0.45479275565375843,
      "MoBERT-N": 0.5532331798363612,
      "MoBERT-min(F/N)": 0.45479275565375843,
      "MoBERT-max(F/N)": 0.5532331798363612,
      "MotionCritic": -11.655030250549316,
      "VeMo (human-opt view)": 0.8602150537634409,
      "VeMo (max entropy view)": 0.8602150537634409,
      "VeMo (min entropy view)": 0.8806963645673324,
      "VeMo (random view)": 0.8602150537634409,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks in a clockwise direction."
  },
  "012655": {
    "text": "the toon jogs in place.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.37835719073331836,
      "Minus Multimodal Distance": -9.495081901550293,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0020822761580348015,
      "MoBERT-F": 0.6241395690687679,
      "MoBERT-N": 0.6827306209992315,
      "MoBERT-min(F/N)": 0.6241395690687679,
      "MoBERT-max(F/N)": 0.6827306209992315,
      "MotionCritic": -7.5876288414001465,
      "VeMo (human-opt view)": 0.8521303258145363,
      "VeMo (max entropy view)": 0.3774104683195592,
      "VeMo (min entropy view)": 0.8521303258145363,
      "VeMo (random view)": 0.3774104683195592,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The toon jogs in place."
  },
  "009941": {
    "text": "excited person starts with arms out and bounces from one foot to the other while clapping.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6187023764432947,
      "Minus Multimodal Distance": -3.8317902088165283,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00013618079537991434,
      "MoBERT-F": 0.5056179327107153,
      "MoBERT-N": 0.5156725036740215,
      "MoBERT-min(F/N)": 0.5056179327107153,
      "MoBERT-max(F/N)": 0.5156725036740215,
      "MotionCritic": -5.737061977386475,
      "VeMo (human-opt view)": 0.5154185022026432,
      "VeMo (max entropy view)": 0.5154185022026432,
      "VeMo (min entropy view)": 0.40691927512355847,
      "VeMo (random view)": 0.5154185022026432,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "An excited person starts with their arms out and bounces from one foot to the other while clapping."
  },
  "014342": {
    "text": "stick man does fighting movements kicks in the air punches the air and stops",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3256706912640486,
      "Minus Multimodal Distance": -11.514566421508789,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8626978993415833,
      "MoBERT-F": 0.7008310589415343,
      "MoBERT-N": 0.6416700234217818,
      "MoBERT-min(F/N)": 0.6416700234217818,
      "MoBERT-max(F/N)": 0.7008310589415343,
      "MotionCritic": -8.160160064697266,
      "VeMo (human-opt view)": 0.9953778095181416,
      "VeMo (max entropy view)": 0.9919429239190602,
      "VeMo (min entropy view)": 0.9953778095181416,
      "VeMo (random view)": 0.9953778095181416,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man does fighting movements, kicks in the air, punches the air and stops."
  },
  "006235": {
    "text": "the man does a wavelike dance using his arms and slightly sways.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4376035427781135,
      "Minus Multimodal Distance": -12.006606101989746,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.02653237245976925,
      "MoBERT-F": 0.5653461031084133,
      "MoBERT-N": 0.5036972960836149,
      "MoBERT-min(F/N)": 0.5036972960836149,
      "MoBERT-max(F/N)": 0.5653461031084133,
      "MotionCritic": -11.301929473876953,
      "VeMo (human-opt view)": 0.8522727272727273,
      "VeMo (max entropy view)": 0.8522727272727273,
      "VeMo (min entropy view)": 0.9099041533546326,
      "VeMo (random view)": 0.9099041533546326,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man performs a wavelike dance, using his arms and swaying slightly."
  },
  "000352": {
    "text": "person looks like theyre holding a microphone and talking into it",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5112303593030721,
      "Minus Multimodal Distance": -5.830453872680664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.9883692554431036e-05,
      "MoBERT-F": 0.3079804277605482,
      "MoBERT-N": 0.3634295894630249,
      "MoBERT-min(F/N)": 0.3079804277605482,
      "MoBERT-max(F/N)": 0.3634295894630249,
      "MotionCritic": -6.586854457855225,
      "VeMo (human-opt view)": 0.16396396396396395,
      "VeMo (max entropy view)": 0.16396396396396395,
      "VeMo (min entropy view)": 0.007552109555935958,
      "VeMo (random view)": 0.007552109555935958,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person looks like they're holding a microphone and talking into it."
  },
  "013512": {
    "text": "while squatting, a person makes circular gestures with their right hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8816026269869005,
      "Minus Multimodal Distance": -12.776005744934082,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.9249214245937765e-05,
      "MoBERT-F": 0.3765972625318899,
      "MoBERT-N": 0.3992379502855937,
      "MoBERT-min(F/N)": 0.3765972625318899,
      "MoBERT-max(F/N)": 0.3992379502855937,
      "MotionCritic": -6.602841854095459,
      "VeMo (human-opt view)": 0.835820895522388,
      "VeMo (max entropy view)": 0.835820895522388,
      "VeMo (min entropy view)": 0.8439716312056738,
      "VeMo (random view)": 0.8439716312056738,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "While squatting, a person makes circular gestures with their right hand."
  },
  "004819": {
    "text": "a person is making a high kick with his left leg.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.38193893859903044,
      "Minus Multimodal Distance": -10.57089900970459,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9954516291618347,
      "MoBERT-F": 0.8100570893918968,
      "MoBERT-N": 0.7420531686318323,
      "MoBERT-min(F/N)": 0.7420531686318323,
      "MoBERT-max(F/N)": 0.8100570893918968,
      "MotionCritic": -7.7666120529174805,
      "VeMo (human-opt view)": 0.9467107680999632,
      "VeMo (max entropy view)": 0.8871595330739299,
      "VeMo (min entropy view)": 0.9467107680999632,
      "VeMo (random view)": 0.8871595330739299,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is making a high kick with their left leg."
  },
  "014010": {
    "text": "a man walks forward with arms relatively stiff at sides.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.32655083721865985,
      "Minus Multimodal Distance": -6.252463340759277,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.226019180146977e-05,
      "MoBERT-F": 0.3461132772678321,
      "MoBERT-N": 0.46457942252713574,
      "MoBERT-min(F/N)": 0.3461132772678321,
      "MoBERT-max(F/N)": 0.46457942252713574,
      "MotionCritic": -2.6283366680145264,
      "VeMo (human-opt view)": 0.9831129196337741,
      "VeMo (max entropy view)": 0.9724529951902056,
      "VeMo (min entropy view)": 0.9831129196337741,
      "VeMo (random view)": 0.9724529951902056,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks forward with his arms relatively stiff at his sides."
  },
  "010441": {
    "text": "the person takes a swing with the baseball bat",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8640633580125796,
      "Minus Multimodal Distance": -3.6991519927978516,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.23079189658164978,
      "MoBERT-F": 0.716279115309757,
      "MoBERT-N": 0.6142717291691328,
      "MoBERT-min(F/N)": 0.6142717291691328,
      "MoBERT-max(F/N)": 0.716279115309757,
      "MotionCritic": -5.200319766998291,
      "VeMo (human-opt view)": 0.02440968416857627,
      "VeMo (max entropy view)": 0.02440968416857627,
      "VeMo (min entropy view)": 0.00011249793202330839,
      "VeMo (random view)": 0.02440968416857627,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person takes a swing with the baseball bat."
  },
  "006304": {
    "text": "a person is crouched down and walking around sneakily.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.4250497430674545,
      "Minus Multimodal Distance": -3.9253480434417725,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00020197397680021822,
      "MoBERT-F": 0.43301502763436395,
      "MoBERT-N": 0.4941916378406571,
      "MoBERT-min(F/N)": 0.43301502763436395,
      "MoBERT-max(F/N)": 0.4941916378406571,
      "MotionCritic": -9.047252655029297,
      "VeMo (human-opt view)": 0.9796839729119639,
      "VeMo (max entropy view)": 0.9577842305115032,
      "VeMo (min entropy view)": 0.9796839729119639,
      "VeMo (random view)": 0.9577842305115032,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is crouched down and walking around sneakily."
  },
  "008354": {
    "text": "a person lightly kicks an object on the ground",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4479754409724271,
      "Minus Multimodal Distance": -6.140541076660156,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9555617570877075,
      "MoBERT-F": 0.8514555606473904,
      "MoBERT-N": 0.6807702685795464,
      "MoBERT-min(F/N)": 0.6807702685795464,
      "MoBERT-max(F/N)": 0.8514555606473904,
      "MotionCritic": -4.253085613250732,
      "VeMo (human-opt view)": 0.9398847104035136,
      "VeMo (max entropy view)": 0.9198926791874281,
      "VeMo (min entropy view)": 0.9398847104035136,
      "VeMo (random view)": 0.9198926791874281,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person lightly kicks an object on the ground."
  },
  "002668": {
    "text": "a person has both hands on his legs.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.12339865300109004,
      "Minus Multimodal Distance": -6.139645099639893,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.820326234446838e-05,
      "MoBERT-F": 0.29579043143677697,
      "MoBERT-N": 0.4092796712144242,
      "MoBERT-min(F/N)": 0.29579043143677697,
      "MoBERT-max(F/N)": 0.4092796712144242,
      "MotionCritic": -11.693314552307129,
      "VeMo (human-opt view)": 0.939768050879162,
      "VeMo (max entropy view)": 0.7546468401486989,
      "VeMo (min entropy view)": 0.939768050879162,
      "VeMo (random view)": 0.7546468401486989,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person has both hands on their legs."
  },
  "001523": {
    "text": "a person feeling the back of their head.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.19821598905530907,
      "Minus Multimodal Distance": -6.395053863525391,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.9083495974191464e-05,
      "MoBERT-F": 0.36111266431687433,
      "MoBERT-N": 0.38652078094694686,
      "MoBERT-min(F/N)": 0.36111266431687433,
      "MoBERT-max(F/N)": 0.38652078094694686,
      "MotionCritic": -8.002256393432617,
      "VeMo (human-opt view)": 0.2558139534883721,
      "VeMo (max entropy view)": 0.2558139534883721,
      "VeMo (min entropy view)": 0.9783743475018642,
      "VeMo (random view)": 0.2558139534883721,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is feeling the back of their head."
  },
  "000847": {
    "text": "the person is walking knees down line crawling.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0583594331020978,
      "Minus Multimodal Distance": -8.875375747680664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8868738412857056,
      "MoBERT-F": 0.6810585343006148,
      "MoBERT-N": 0.5780730651368522,
      "MoBERT-min(F/N)": 0.5780730651368522,
      "MoBERT-max(F/N)": 0.6810585343006148,
      "MotionCritic": -6.810642719268799,
      "VeMo (human-opt view)": 0.899165061014772,
      "VeMo (max entropy view)": 0.880648899188876,
      "VeMo (min entropy view)": 0.899165061014772,
      "VeMo (random view)": 0.880648899188876,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking, knees down, crawling along a line."
  },
  "012215": {
    "text": "the person is walking slowly like a monster.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5345501233327599,
      "Minus Multimodal Distance": -3.552023410797119,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.456743434071541e-05,
      "MoBERT-F": 0.3415828518846433,
      "MoBERT-N": 0.48723836101959106,
      "MoBERT-min(F/N)": 0.3415828518846433,
      "MoBERT-max(F/N)": 0.48723836101959106,
      "MotionCritic": -3.529505491256714,
      "VeMo (human-opt view)": 0.6793893129770993,
      "VeMo (max entropy view)": 0.5921052631578947,
      "VeMo (min entropy view)": 0.6793893129770993,
      "VeMo (random view)": 0.6793893129770993,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is walking slowly, like a monster."
  },
  "012686": {
    "text": "laying down on face and crawling backwards.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7193378122075358,
      "Minus Multimodal Distance": -7.270498752593994,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.3355128765106201,
      "MoBERT-F": 0.5514511087306847,
      "MoBERT-N": 0.5078048342008513,
      "MoBERT-min(F/N)": 0.5078048342008513,
      "MoBERT-max(F/N)": 0.5514511087306847,
      "MotionCritic": -9.212058067321777,
      "VeMo (human-opt view)": 0.8990694345025053,
      "VeMo (max entropy view)": 0.7541766109785203,
      "VeMo (min entropy view)": 0.8990694345025053,
      "VeMo (random view)": 0.8990694345025053,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is lying down on their face and crawling backwards."
  },
  "006674": {
    "text": "walking forward in a diagonal line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7569645731503247,
      "Minus Multimodal Distance": -5.455787181854248,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.019748666905798e-05,
      "MoBERT-F": 0.5263823574381719,
      "MoBERT-N": 0.5580793292338644,
      "MoBERT-min(F/N)": 0.5263823574381719,
      "MoBERT-max(F/N)": 0.5580793292338644,
      "MotionCritic": -1.3730695247650146,
      "VeMo (human-opt view)": 0.9496204278812974,
      "VeMo (max entropy view)": 0.9496204278812974,
      "VeMo (min entropy view)": 0.9740501478156137,
      "VeMo (random view)": 0.9740501478156137,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking forward in a diagonal line."
  },
  "013567": {
    "text": "person is performing dance moves",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.4093280625618896,
      "Minus Multimodal Distance": -4.391321659088135,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5719158053398132,
      "MoBERT-F": 0.7145963741381205,
      "MoBERT-N": 0.6246872459828403,
      "MoBERT-min(F/N)": 0.6246872459828403,
      "MoBERT-max(F/N)": 0.7145963741381205,
      "MotionCritic": -6.865739822387695,
      "VeMo (human-opt view)": 0.9875914731148584,
      "VeMo (max entropy view)": 0.9875914731148584,
      "VeMo (min entropy view)": 0.9937054133445237,
      "VeMo (random view)": 0.9937054133445237,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is performing dance moves."
  },
  "011458": {
    "text": "a man repeatedly uses his right arm to reach for something slightly in front of him, back again, then looks to be stirring something.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.270675300454643,
      "Minus Multimodal Distance": -13.15103530883789,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.596352689783089e-05,
      "MoBERT-F": 0.28308793466658405,
      "MoBERT-N": 0.37229461100391187,
      "MoBERT-min(F/N)": 0.28308793466658405,
      "MoBERT-max(F/N)": 0.37229461100391187,
      "MotionCritic": -6.903107166290283,
      "VeMo (human-opt view)": 0.755056179775281,
      "VeMo (max entropy view)": 0.755056179775281,
      "VeMo (min entropy view)": 0.7551401869158878,
      "VeMo (random view)": 0.755056179775281,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man repeatedly reaches forward with his right arm for something slightly in front of himself, pulls his arm back again, and then looks to be stirring something."
  },
  "009519": {
    "text": "a person standing scratches his head with his right hand and then lowers his hand back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.22449600156928923,
      "Minus Multimodal Distance": -9.818310737609863,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.313057797844522e-05,
      "MoBERT-F": 0.274331980345736,
      "MoBERT-N": 0.368136768848829,
      "MoBERT-min(F/N)": 0.274331980345736,
      "MoBERT-max(F/N)": 0.368136768848829,
      "MotionCritic": -3.1775734424591064,
      "VeMo (human-opt view)": 0.9151614668856048,
      "VeMo (max entropy view)": 0.9151614668856048,
      "VeMo (min entropy view)": 0.9970065367462887,
      "VeMo (random view)": 0.9970065367462887,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person standing scratches his head with his right hand and then lowers his hand back down."
  },
  "006432": {
    "text": "a man raises his right hand to his head then lowers it back down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.26018734640378927,
      "Minus Multimodal Distance": -5.957237243652344,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.77547912648879e-05,
      "MoBERT-F": 0.2785933445823131,
      "MoBERT-N": 0.3912626933818636,
      "MoBERT-min(F/N)": 0.2785933445823131,
      "MoBERT-max(F/N)": 0.3912626933818636,
      "MotionCritic": -6.037551403045654,
      "VeMo (human-opt view)": 0.9976892567684084,
      "VeMo (max entropy view)": 0.9968238455900318,
      "VeMo (min entropy view)": 0.9976892567684084,
      "VeMo (random view)": 0.9968238455900318,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man raises his right hand to his head, then lowers it back down."
  },
  "011471": {
    "text": "a person sits on the ledge of something then gets off and walks away.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.3673017355094044,
      "Minus Multimodal Distance": -2.2080607414245605,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9798000454902649,
      "MoBERT-F": 0.6307089122160601,
      "MoBERT-N": 0.4783948534990971,
      "MoBERT-min(F/N)": 0.4783948534990971,
      "MoBERT-max(F/N)": 0.6307089122160601,
      "MotionCritic": -7.280084609985352,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.5930232558139535,
      "VeMo (random view)": 0.5930232558139535,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person sits on the ledge of something, then gets off and walks away."
  },
  "005712": {
    "text": "sitting down crisscrossed, the right arm chucks forward and chucks forward again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.801216296800379,
      "Minus Multimodal Distance": -3.76686692237854,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 8.73367753229104e-05,
      "MoBERT-F": 0.3516080186768662,
      "MoBERT-N": 0.4227828095937049,
      "MoBERT-min(F/N)": 0.3516080186768662,
      "MoBERT-max(F/N)": 0.4227828095937049,
      "MotionCritic": -6.917600154876709,
      "VeMo (human-opt view)": 0.6797066014669927,
      "VeMo (max entropy view)": 0.4845605700712589,
      "VeMo (min entropy view)": 0.6797066014669927,
      "VeMo (random view)": 0.4845605700712589,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person sitting down crisscrossed, the right arm thrusts forward and thrusts forward again."
  },
  "005904": {
    "text": "a person does four jumping jacks then two forward leg jumps.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5665896995765562,
      "Minus Multimodal Distance": -3.132066488265991,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.843848466873169,
      "MoBERT-F": 0.7958020906140476,
      "MoBERT-N": 0.8595833521200229,
      "MoBERT-min(F/N)": 0.7958020906140476,
      "MoBERT-max(F/N)": 0.8595833521200229,
      "MotionCritic": -4.596532821655273,
      "VeMo (human-opt view)": 0.8739946380697051,
      "VeMo (max entropy view)": 0.8597899938233478,
      "VeMo (min entropy view)": 0.8739946380697051,
      "VeMo (random view)": 0.8597899938233478,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person does four jumping jacks, then two forward leg jumps."
  },
  "012992": {
    "text": "the person is making up and down hand gestures.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.980407565495876,
      "Minus Multimodal Distance": -10.121830940246582,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.9953271223348565e-05,
      "MoBERT-F": 0.30922132030899513,
      "MoBERT-N": 0.3682284976471785,
      "MoBERT-min(F/N)": 0.30922132030899513,
      "MoBERT-max(F/N)": 0.3682284976471785,
      "MotionCritic": -10.018516540527344,
      "VeMo (human-opt view)": 0.5160142348754448,
      "VeMo (max entropy view)": 0.5160142348754448,
      "VeMo (min entropy view)": 0.5319148936170213,
      "VeMo (random view)": 0.5319148936170213,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is making up - and - down hand gestures."
  },
  "010928": {
    "text": "this person kicks with his right foot while standing.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3260553963464619,
      "Minus Multimodal Distance": -11.050882339477539,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.982914388179779,
      "MoBERT-F": 0.8533004213319205,
      "MoBERT-N": 0.68965065901711,
      "MoBERT-min(F/N)": 0.68965065901711,
      "MoBERT-max(F/N)": 0.8533004213319205,
      "MotionCritic": -4.844234943389893,
      "VeMo (human-opt view)": 0.9740895089690161,
      "VeMo (max entropy view)": 0.95276266819353,
      "VeMo (min entropy view)": 0.9740895089690161,
      "VeMo (random view)": 0.9740895089690161,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "This person kicks with his right foot while standing."
  },
  "003664": {
    "text": "a person takes off their t-shirt with one arm.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.2591502568030662,
      "Minus Multimodal Distance": -12.968027114868164,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.400143734528683e-05,
      "MoBERT-F": 0.32036958977345803,
      "MoBERT-N": 0.35367527273252575,
      "MoBERT-min(F/N)": 0.32036958977345803,
      "MoBERT-max(F/N)": 0.35367527273252575,
      "MotionCritic": -5.86523962020874,
      "VeMo (human-opt view)": 2.598349462080849e-05,
      "VeMo (max entropy view)": 7.049300155888886e-05,
      "VeMo (min entropy view)": 2.598349462080849e-05,
      "VeMo (random view)": 2.598349462080849e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person takes off their T - shirt with one arm."
  },
  "009946": {
    "text": "a person does a swinging golf club motion",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8118889685186975,
      "Minus Multimodal Distance": -6.653295516967773,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0032304588239639997,
      "MoBERT-F": 0.7300262159619362,
      "MoBERT-N": 0.6678955888485396,
      "MoBERT-min(F/N)": 0.6678955888485396,
      "MoBERT-max(F/N)": 0.7300262159619362,
      "MotionCritic": -8.27269172668457,
      "VeMo (human-opt view)": 0.7427652733118971,
      "VeMo (max entropy view)": 0.6934306569343066,
      "VeMo (min entropy view)": 0.7427652733118971,
      "VeMo (random view)": 0.7427652733118971,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person makes a swinging motion with a golf club."
  },
  "010795": {
    "text": "a person starts a jogging on the place",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2975791047801264,
      "Minus Multimodal Distance": -13.150493621826172,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00013216391380410641,
      "MoBERT-F": 0.5595778370618485,
      "MoBERT-N": 0.5949790037299942,
      "MoBERT-min(F/N)": 0.5595778370618485,
      "MoBERT-max(F/N)": 0.5949790037299942,
      "MotionCritic": -9.753243446350098,
      "VeMo (human-opt view)": 0.9495822529530394,
      "VeMo (max entropy view)": 0.40714285714285714,
      "VeMo (min entropy view)": 0.9495822529530394,
      "VeMo (random view)": 0.9495822529530394,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person starts jogging in the place."
  },
  "008694": {
    "text": "a person walks over and across a beam.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6373609708190306,
      "Minus Multimodal Distance": -4.1613993644714355,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.544436185620725e-05,
      "MoBERT-F": 0.29409997330865567,
      "MoBERT-N": 0.4708965544757911,
      "MoBERT-min(F/N)": 0.29409997330865567,
      "MoBERT-max(F/N)": 0.4708965544757911,
      "MotionCritic": -2.2422244548797607,
      "VeMo (human-opt view)": 0.5166051660516605,
      "VeMo (max entropy view)": 0.5166051660516605,
      "VeMo (min entropy view)": 0.453125,
      "VeMo (random view)": 0.453125,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks across a beam."
  },
  "012023": {
    "text": "a person walks while holding a hand rail",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4445195518479653,
      "Minus Multimodal Distance": -5.404397964477539,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.770265074213967e-05,
      "MoBERT-F": 0.2859496177872034,
      "MoBERT-N": 0.4109170824969658,
      "MoBERT-min(F/N)": 0.2859496177872034,
      "MoBERT-max(F/N)": 0.4109170824969658,
      "MotionCritic": -4.390252590179443,
      "VeMo (human-opt view)": 6.0002873280446226e-05,
      "VeMo (max entropy view)": 0.00011286049523185307,
      "VeMo (min entropy view)": 6.0002873280446226e-05,
      "VeMo (random view)": 0.00011286049523185307,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks while holding a handrail."
  },
  "003685": {
    "text": "ski mountain lodge  go down the mountain slowly",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3667069090640575,
      "Minus Multimodal Distance": -9.587812423706055,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.1522584069753066e-05,
      "MoBERT-F": 0.28837448574516655,
      "MoBERT-N": 0.3816785492731981,
      "MoBERT-min(F/N)": 0.28837448574516655,
      "MoBERT-max(F/N)": 0.3816785492731981,
      "MotionCritic": 0.08780821412801743,
      "VeMo (human-opt view)": 0.0001268547362415753,
      "VeMo (max entropy view)": 0.0004309561515096446,
      "VeMo (min entropy view)": 0.0001268547362415753,
      "VeMo (random view)": 0.0004309561515096446,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person goes down the mountain slowly from the ski mountain lodge."
  },
  "006718": {
    "text": "a person moves into a fighting stance.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6688379513150836,
      "Minus Multimodal Distance": -11.60043716430664,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00014926456788089126,
      "MoBERT-F": 0.5111974847674932,
      "MoBERT-N": 0.5812763381432571,
      "MoBERT-min(F/N)": 0.5111974847674932,
      "MoBERT-max(F/N)": 0.5812763381432571,
      "MotionCritic": -2.846564292907715,
      "VeMo (human-opt view)": 0.9859343603482921,
      "VeMo (max entropy view)": 0.9783869267264101,
      "VeMo (min entropy view)": 0.9859343603482921,
      "VeMo (random view)": 0.9859343603482921,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person moves into a fighting stance."
  },
  "009942": {
    "text": "the person is pouring some thing.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.27457229515321263,
      "Minus Multimodal Distance": -5.206620693206787,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.5544262825860642e-05,
      "MoBERT-F": 0.37033350999355036,
      "MoBERT-N": 0.3959399378130504,
      "MoBERT-min(F/N)": 0.37033350999355036,
      "MoBERT-max(F/N)": 0.3959399378130504,
      "MotionCritic": -6.706456661224365,
      "VeMo (human-opt view)": 0.00013572781508604535,
      "VeMo (max entropy view)": 0.00013572781508604535,
      "VeMo (min entropy view)": 2.021079457199627e-05,
      "VeMo (random view)": 2.021079457199627e-05,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is pouring something."
  },
  "007742": {
    "text": "a person waves a friendly hello.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.22807895038125517,
      "Minus Multimodal Distance": -6.526411533355713,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.446491453156341e-05,
      "MoBERT-F": 0.4186246098089431,
      "MoBERT-N": 0.41311388235103014,
      "MoBERT-min(F/N)": 0.41311388235103014,
      "MoBERT-max(F/N)": 0.4186246098089431,
      "MotionCritic": -8.971197128295898,
      "VeMo (human-opt view)": 0.9755840042233074,
      "VeMo (max entropy view)": 0.9578107183580388,
      "VeMo (min entropy view)": 0.9755840042233074,
      "VeMo (random view)": 0.9578107183580388,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person waves a friendly hello."
  },
  "011569": {
    "text": "the person is on the seesaw.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3263695814350584,
      "Minus Multimodal Distance": -7.780457496643066,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.17459431290626526,
      "MoBERT-F": 0.7192824446626604,
      "MoBERT-N": 0.6150107999393427,
      "MoBERT-min(F/N)": 0.6150107999393427,
      "MoBERT-max(F/N)": 0.7192824446626604,
      "MotionCritic": -10.051557540893555,
      "VeMo (human-opt view)": 2.0144624689783496e-05,
      "VeMo (max entropy view)": 2.2828519847268923e-05,
      "VeMo (min entropy view)": 2.0144624689783496e-05,
      "VeMo (random view)": 2.0144624689783496e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is on the seesaw."
  },
  "000865": {
    "text": "the person is preforming a swimming stroke know as the butterfly stroke.  the arms swing from behind the head and reenter the water propelling the person forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.5444417592381667,
      "Minus Multimodal Distance": -6.280336380004883,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.054268745472655e-05,
      "MoBERT-F": 0.47968907831538143,
      "MoBERT-N": 0.526667144625444,
      "MoBERT-min(F/N)": 0.47968907831538143,
      "MoBERT-max(F/N)": 0.526667144625444,
      "MotionCritic": -9.921052932739258,
      "VeMo (human-opt view)": 0.8358714043993232,
      "VeMo (max entropy view)": 0.7772795216741405,
      "VeMo (min entropy view)": 0.8358714043993232,
      "VeMo (random view)": 0.7772795216741405,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is performing a swimming stroke known as the butterfly stroke. The arms swing from behind the head and re - enter the water, propelling the person forward."
  },
  "001003": {
    "text": "the person is walking forward using both their left and right hand for balance. their left and right hand are pressing down on something to help them walk. they put their right foot directly in front of the left and then the left directly in front of the right they stop walking and stand",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4582158367281304,
      "Minus Multimodal Distance": -8.598976135253906,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.05992471112404e-05,
      "MoBERT-F": 0.35937486419241593,
      "MoBERT-N": 0.4989202949328994,
      "MoBERT-min(F/N)": 0.35937486419241593,
      "MoBERT-max(F/N)": 0.4989202949328994,
      "MotionCritic": -3.08056640625,
      "VeMo (human-opt view)": 0.8086642599277978,
      "VeMo (max entropy view)": 0.80859375,
      "VeMo (min entropy view)": 0.8086642599277978,
      "VeMo (random view)": 0.8086642599277978,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is walking forward, using both their left and right hands for balance. Their left and right hands are pressing down on something to help them walk. They put their right foot directly in front of the left, and then the left directly in front of the right. They stop walking and stand."
  },
  "000824": {
    "text": "person is jogging and then gets down and walks like an ape and then gets back up and jogs again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9736233252165144,
      "Minus Multimodal Distance": -8.326926231384277,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.18671944737434387,
      "MoBERT-F": 0.4812268910567705,
      "MoBERT-N": 0.5042315050102273,
      "MoBERT-min(F/N)": 0.4812268910567705,
      "MoBERT-max(F/N)": 0.5042315050102273,
      "MotionCritic": -8.939309120178223,
      "VeMo (human-opt view)": 0.9397369226063016,
      "VeMo (max entropy view)": 0.9049338146811071,
      "VeMo (min entropy view)": 0.9397369226063016,
      "VeMo (random view)": 0.9049338146811071,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is jogging, then gets down and walks like an ape, then gets back up and jogs again."
  },
  "014171": {
    "text": "a walking man bumps something with his right leg then turns and walks in another direction.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1208756328543592,
      "Minus Multimodal Distance": -4.37261438369751,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.8710974220302887e-05,
      "MoBERT-F": 0.4118624256776557,
      "MoBERT-N": 0.4771932234294105,
      "MoBERT-min(F/N)": 0.4118624256776557,
      "MoBERT-max(F/N)": 0.4771932234294105,
      "MotionCritic": -6.559272766113281,
      "VeMo (human-opt view)": 0.9467084639498433,
      "VeMo (max entropy view)": 0.9044967880085653,
      "VeMo (min entropy view)": 0.9467084639498433,
      "VeMo (random view)": 0.9467084639498433,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A walking man bumps into something with his right leg, then turns and walks in another direction."
  },
  "000302": {
    "text": "a person goes into a ducking position like they are shielding themselves from something",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6106933175245922,
      "Minus Multimodal Distance": -7.696950435638428,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.172923970851116e-05,
      "MoBERT-F": 0.46958142255906316,
      "MoBERT-N": 0.4241477877551627,
      "MoBERT-min(F/N)": 0.4241477877551627,
      "MoBERT-max(F/N)": 0.46958142255906316,
      "MotionCritic": -7.5211052894592285,
      "VeMo (human-opt view)": 0.9707163237723382,
      "VeMo (max entropy view)": 0.9707163237723382,
      "VeMo (min entropy view)": 0.982010582010582,
      "VeMo (random view)": 0.982010582010582,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person assumes a ducking position as if they are shielding themselves from something."
  },
  "003858": {
    "text": "a person raises both hands and waves them in various ways.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.34336026716160567,
      "Minus Multimodal Distance": -5.9665021896362305,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.4496219339198433e-05,
      "MoBERT-F": 0.35207012379869423,
      "MoBERT-N": 0.3722912805594374,
      "MoBERT-min(F/N)": 0.35207012379869423,
      "MoBERT-max(F/N)": 0.3722912805594374,
      "MotionCritic": -13.613348960876465,
      "VeMo (human-opt view)": 0.9978206202849959,
      "VeMo (max entropy view)": 0.9970196748952137,
      "VeMo (min entropy view)": 0.9978206202849959,
      "VeMo (random view)": 0.9970196748952137,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person raises both hands and waves them in various ways."
  },
  "011717": {
    "text": "a ballerina is bringing the leg forward then taking a bow.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5738731264879778,
      "Minus Multimodal Distance": -10.919564247131348,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.5228767395019531,
      "MoBERT-F": 0.6704041057071999,
      "MoBERT-N": 0.6215452384881534,
      "MoBERT-min(F/N)": 0.6215452384881534,
      "MoBERT-max(F/N)": 0.6704041057071999,
      "MotionCritic": -2.1361498832702637,
      "VeMo (human-opt view)": 0.0055455836257936614,
      "VeMo (max entropy view)": 0.0055455836257936614,
      "VeMo (min entropy view)": 0.00011196931838929614,
      "VeMo (random view)": 0.00011196931838929614,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A ballerina is bringing the leg forward and then taking a bow."
  },
  "013568": {
    "text": "a person jumps repeatedly, waving their hands in circles.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.8287457219894491,
      "Minus Multimodal Distance": -9.080615997314453,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9884639382362366,
      "MoBERT-F": 0.8548297486246712,
      "MoBERT-N": 0.7563801924471469,
      "MoBERT-min(F/N)": 0.7563801924471469,
      "MoBERT-max(F/N)": 0.8548297486246712,
      "MotionCritic": -5.112032413482666,
      "VeMo (human-opt view)": 0.3071593533487298,
      "VeMo (max entropy view)": 0.3071593533487298,
      "VeMo (min entropy view)": 0.0568882796435915,
      "VeMo (random view)": 0.0568882796435915,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jumps repeatedly, waving their hands in circles."
  },
  "004344": {
    "text": "a person walks straifht forward with head bent forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4765828834811487,
      "Minus Multimodal Distance": -5.18336296081543,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4643366486998275e-05,
      "MoBERT-F": 0.3298478383449895,
      "MoBERT-N": 0.45484626339215123,
      "MoBERT-min(F/N)": 0.3298478383449895,
      "MoBERT-max(F/N)": 0.45484626339215123,
      "MotionCritic": -3.533937454223633,
      "VeMo (human-opt view)": 0.06361323155216285,
      "VeMo (max entropy view)": 0.15643906655142611,
      "VeMo (min entropy view)": 0.06361323155216285,
      "VeMo (random view)": 0.15643906655142611,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks straight forward with head bent forward."
  },
  "009072": {
    "text": "the person was flying around like a fly.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.6591533284823081,
      "Minus Multimodal Distance": -4.6243462562561035,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.966324220527895e-05,
      "MoBERT-F": 0.46329578633038215,
      "MoBERT-N": 0.5111261272920109,
      "MoBERT-min(F/N)": 0.46329578633038215,
      "MoBERT-max(F/N)": 0.5111261272920109,
      "MotionCritic": -6.883544445037842,
      "VeMo (human-opt view)": 0.0002975623292777928,
      "VeMo (max entropy view)": 0.00062613726222217,
      "VeMo (min entropy view)": 0.0002975623292777928,
      "VeMo (random view)": 0.0002975623292777928,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was flying around like a fly."
  },
  "011897": {
    "text": "a person kneeling on the ground gets up.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7060113932483904,
      "Minus Multimodal Distance": -10.739237785339355,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.1807744950056076,
      "MoBERT-F": 0.5528256338036206,
      "MoBERT-N": 0.5278594882675288,
      "MoBERT-min(F/N)": 0.5278594882675288,
      "MoBERT-max(F/N)": 0.5528256338036206,
      "MotionCritic": -2.286801815032959,
      "VeMo (human-opt view)": 0.9995119570522206,
      "VeMo (max entropy view)": 0.9994122696720464,
      "VeMo (min entropy view)": 0.9995119570522206,
      "VeMo (random view)": 0.9995119570522206,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person kneeling on the ground gets up."
  },
  "011340": {
    "text": "the person was pushed but stayed standing.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.27613024048833895,
      "Minus Multimodal Distance": -2.9236574172973633,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.743362529145088e-05,
      "MoBERT-F": 0.31536515201512844,
      "MoBERT-N": 0.3977885267537217,
      "MoBERT-min(F/N)": 0.31536515201512844,
      "MoBERT-max(F/N)": 0.3977885267537217,
      "MotionCritic": -0.9195412993431091,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.5157232704402516,
      "VeMo (random view)": 0.5157232704402516,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was pushed but stayed standing."
  },
  "007516": {
    "text": "standing on one leg and hopping.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5944035385175761,
      "Minus Multimodal Distance": -6.065024375915527,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0007619881071150303,
      "MoBERT-F": 0.575640676707293,
      "MoBERT-N": 0.6821573898786261,
      "MoBERT-min(F/N)": 0.575640676707293,
      "MoBERT-max(F/N)": 0.6821573898786261,
      "MotionCritic": -10.558490753173828,
      "VeMo (human-opt view)": 0.9919363284113519,
      "VeMo (max entropy view)": 0.9840955586352538,
      "VeMo (min entropy view)": 0.9919363284113519,
      "VeMo (random view)": 0.9840955586352538,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing on one leg and hopping."
  },
  "011210": {
    "text": "person is walking wobbling forward.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.2694405582773114,
      "Minus Multimodal Distance": -4.239401817321777,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.5228522645193152e-05,
      "MoBERT-F": 0.4510873231673717,
      "MoBERT-N": 0.531873247969002,
      "MoBERT-min(F/N)": 0.4510873231673717,
      "MoBERT-max(F/N)": 0.531873247969002,
      "MotionCritic": -2.9617443084716797,
      "VeMo (human-opt view)": 0.9466972711043101,
      "VeMo (max entropy view)": 0.9197461739455021,
      "VeMo (min entropy view)": 0.9466972711043101,
      "VeMo (random view)": 0.9466972711043101,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking wobblingly forward."
  },
  "011224": {
    "text": "running forward and stopping.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5943731325509031,
      "Minus Multimodal Distance": -6.516633033752441,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.03991221264004707,
      "MoBERT-F": 0.6731326558051971,
      "MoBERT-N": 0.5801387704018824,
      "MoBERT-min(F/N)": 0.5801387704018824,
      "MoBERT-max(F/N)": 0.6731326558051971,
      "MotionCritic": -6.650367259979248,
      "VeMo (human-opt view)": 0.9966158621918018,
      "VeMo (max entropy view)": 0.9902984158172664,
      "VeMo (min entropy view)": 0.9966158621918018,
      "VeMo (random view)": 0.9966158621918018,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs forward and stops."
  },
  "005197": {
    "text": "a person lowers their arms to their sides, then stretches them out straight in front of them and parallel to the ground before returning them to their sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.13931886081506836,
      "Minus Multimodal Distance": -5.476069450378418,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.211463237879798e-05,
      "MoBERT-F": 0.32362332271331545,
      "MoBERT-N": 0.4549981568685944,
      "MoBERT-min(F/N)": 0.32362332271331545,
      "MoBERT-max(F/N)": 0.4549981568685944,
      "MotionCritic": -7.370824337005615,
      "VeMo (human-opt view)": 0.7053254437869823,
      "VeMo (max entropy view)": 0.34831460674157305,
      "VeMo (min entropy view)": 0.7053254437869823,
      "VeMo (random view)": 0.7053254437869823,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person lowers their arms to their sides, then stretches them out straight in front of themselves and parallel to the ground before returning them to their sides."
  },
  "002542": {
    "text": "a man leans forward to pick up an object slightly to his left, and places it down slightly to his right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.11228286797955216,
      "Minus Multimodal Distance": -3.112082004547119,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.1981202482711524e-05,
      "MoBERT-F": 0.31354207359988184,
      "MoBERT-N": 0.4682285024216477,
      "MoBERT-min(F/N)": 0.31354207359988184,
      "MoBERT-max(F/N)": 0.4682285024216477,
      "MotionCritic": -1.4150539636611938,
      "VeMo (human-opt view)": 0.9241379310344827,
      "VeMo (max entropy view)": 0.7318007662835249,
      "VeMo (min entropy view)": 0.9241379310344827,
      "VeMo (random view)": 0.9241379310344827,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man leans forward to pick up an object slightly to his left and places it down slightly to his right."
  },
  "010032": {
    "text": "a person walks forward and jumps over an object, then turns around to jump over it again and walk back.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8961305413379237,
      "Minus Multimodal Distance": -7.302992820739746,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.8932990431785583,
      "MoBERT-F": 0.5563433847456105,
      "MoBERT-N": 0.725852176165336,
      "MoBERT-min(F/N)": 0.5563433847456105,
      "MoBERT-max(F/N)": 0.725852176165336,
      "MotionCritic": -9.179388999938965,
      "VeMo (human-opt view)": 0.00856297680211739,
      "VeMo (max entropy view)": 0.00856297680211739,
      "VeMo (min entropy view)": 0.0033656502737730446,
      "VeMo (random view)": 0.0033656502737730446,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward and jumps over an object. Then, they turn around to jump over it again and walk back."
  },
  "005399": {
    "text": "a man stands on the ground and lifts his right hand for a moment above the shoulder and then puts it down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.2550345624652953,
      "Minus Multimodal Distance": -6.5175957679748535,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 8.53479650686495e-05,
      "MoBERT-F": 0.25697594298111487,
      "MoBERT-N": 0.3733533518841245,
      "MoBERT-min(F/N)": 0.25697594298111487,
      "MoBERT-max(F/N)": 0.3733533518841245,
      "MotionCritic": -4.637375354766846,
      "VeMo (human-opt view)": 0.9883105697059186,
      "VeMo (max entropy view)": 0.9604943721032885,
      "VeMo (min entropy view)": 0.9883105697059186,
      "VeMo (random view)": 0.9604943721032885,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man stands on the ground, lifts his right hand above his shoulder for a moment, and then puts it down."
  },
  "013154": {
    "text": "a man steps forward, then brushes something at knee height with his right arm, then steps back and stretches his arms out to either side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5265680055810552,
      "Minus Multimodal Distance": -10.624839782714844,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.4137141483370215e-05,
      "MoBERT-F": 0.2987816324549938,
      "MoBERT-N": 0.4253056016314471,
      "MoBERT-min(F/N)": 0.2987816324549938,
      "MoBERT-max(F/N)": 0.4253056016314471,
      "MotionCritic": -9.7384033203125,
      "VeMo (human-opt view)": 0.9896500739280434,
      "VeMo (max entropy view)": 0.9526488513830286,
      "VeMo (min entropy view)": 0.9896500739280434,
      "VeMo (random view)": 0.9896500739280434,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man steps forward, then brushes something at knee height with his right arm. Then he steps back and stretches his arms out to either side."
  },
  "011682": {
    "text": "a man stands prepared and aware, then suddenly takes an abrupt step backwards and regains his ready stance.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.1951760726446078,
      "Minus Multimodal Distance": -7.784367561340332,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00012784168939106166,
      "MoBERT-F": 0.47713818046572376,
      "MoBERT-N": 0.5009663123872642,
      "MoBERT-min(F/N)": 0.47713818046572376,
      "MoBERT-max(F/N)": 0.5009663123872642,
      "MotionCritic": -6.4387383460998535,
      "VeMo (human-opt view)": 0.8080808080808081,
      "VeMo (max entropy view)": 0.7777777777777778,
      "VeMo (min entropy view)": 0.8080808080808081,
      "VeMo (random view)": 0.7777777777777778,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man stands prepared and aware. Then, suddenly, he takes an abrupt step backwards and regains his ready stance."
  },
  "013952": {
    "text": "walking forward then slightly bending down at waist and touching a surface from right to left",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5084741661202485,
      "Minus Multimodal Distance": -5.416079998016357,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.6517738660913892e-05,
      "MoBERT-F": 0.3673944558044453,
      "MoBERT-N": 0.4710768584372518,
      "MoBERT-min(F/N)": 0.3673944558044453,
      "MoBERT-max(F/N)": 0.4710768584372518,
      "MotionCritic": -6.933313369750977,
      "VeMo (human-opt view)": 0.6073131955484896,
      "VeMo (max entropy view)": 0.6073131955484896,
      "VeMo (min entropy view)": 0.826722338204593,
      "VeMo (random view)": 0.826722338204593,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking forward, then slightly bending down at the waist and touching a surface from right to left."
  },
  "008139": {
    "text": "the sim appears to be using their left hand to wipe or wash a window.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.6725634678997344,
      "Minus Multimodal Distance": -5.332784652709961,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.0503862692276016e-05,
      "MoBERT-F": 0.3042931916167043,
      "MoBERT-N": 0.3532354019304932,
      "MoBERT-min(F/N)": 0.3042931916167043,
      "MoBERT-max(F/N)": 0.3532354019304932,
      "MotionCritic": -5.647254467010498,
      "VeMo (human-opt view)": 0.4222222222222222,
      "VeMo (max entropy view)": 0.4222222222222222,
      "VeMo (min entropy view)": 0.7422680412371134,
      "VeMo (random view)": 0.4222222222222222,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The sim appears to be using their left hand to wipe or wash a window."
  },
  "014361": {
    "text": "the person is getting ready to place the football for a kick off.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8779365387505113,
      "Minus Multimodal Distance": -2.9827487468719482,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.22239074110984802,
      "MoBERT-F": 0.6262815808718643,
      "MoBERT-N": 0.5840932223356268,
      "MoBERT-min(F/N)": 0.5840932223356268,
      "MoBERT-max(F/N)": 0.6262815808718643,
      "MotionCritic": -0.2880472242832184,
      "VeMo (human-opt view)": 0.0674206176598521,
      "VeMo (max entropy view)": 0.22340425531914893,
      "VeMo (min entropy view)": 0.0674206176598521,
      "VeMo (random view)": 0.0674206176598521,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is getting ready to place the football for a kick - off."
  },
  "000324": {
    "text": "a person bounce sup and down on the balls of their feet.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.37665467159605054,
      "Minus Multimodal Distance": -5.419960975646973,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.566765215306077e-05,
      "MoBERT-F": 0.43229772884613493,
      "MoBERT-N": 0.5567030642064938,
      "MoBERT-min(F/N)": 0.43229772884613493,
      "MoBERT-max(F/N)": 0.5567030642064938,
      "MotionCritic": -3.582512855529785,
      "VeMo (human-opt view)": 0.9149560117302052,
      "VeMo (max entropy view)": 0.7439198855507868,
      "VeMo (min entropy view)": 0.9149560117302052,
      "VeMo (random view)": 0.9149560117302052,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person bounces up and down on the balls of their feet."
  },
  "007675": {
    "text": "person is in t stance, brings arms in, then returns to t",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5955462866408187,
      "Minus Multimodal Distance": -6.025632858276367,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.492112798790913e-05,
      "MoBERT-F": 0.3667274500542298,
      "MoBERT-N": 0.4625869595643043,
      "MoBERT-min(F/N)": 0.3667274500542298,
      "MoBERT-max(F/N)": 0.4625869595643043,
      "MotionCritic": -7.82952356338501,
      "VeMo (human-opt view)": 0.835881753312946,
      "VeMo (max entropy view)": 0.731006160164271,
      "VeMo (min entropy view)": 0.835881753312946,
      "VeMo (random view)": 0.835881753312946,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is in a T stance, brings the arms in, then returns to the T stance."
  },
  "003062": {
    "text": "a person is standing still while waving his right hand.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.19485290005693873,
      "Minus Multimodal Distance": -3.5365116596221924,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 4.125419945921749e-05,
      "MoBERT-F": 0.3182257743916732,
      "MoBERT-N": 0.38933301470237663,
      "MoBERT-min(F/N)": 0.3182257743916732,
      "MoBERT-max(F/N)": 0.38933301470237663,
      "MotionCritic": -5.170836448669434,
      "VeMo (human-opt view)": 0.9399409255004922,
      "VeMo (max entropy view)": 0.9399409255004922,
      "VeMo (min entropy view)": 0.9525514771709938,
      "VeMo (random view)": 0.9525514771709938,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is standing still while waving his right hand."
  },
  "013745": {
    "text": "the person was holding the right side of his head.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.26630517415887706,
      "Minus Multimodal Distance": -8.927878379821777,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.728513911482878e-05,
      "MoBERT-F": 0.32475564752195296,
      "MoBERT-N": 0.36756375762060584,
      "MoBERT-min(F/N)": 0.32475564752195296,
      "MoBERT-max(F/N)": 0.36756375762060584,
      "MotionCritic": -2.36641788482666,
      "VeMo (human-opt view)": 0.7553191489361702,
      "VeMo (max entropy view)": 0.7553191489361702,
      "VeMo (min entropy view)": 0.9601820250284414,
      "VeMo (random view)": 0.7553191489361702,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was holding the right side of his head."
  },
  "002470": {
    "text": "a person walks clockwise from the 3 o'clock position to the 11 o'clock position.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6404040659590896,
      "Minus Multimodal Distance": -7.636926174163818,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.46917784214019775,
      "MoBERT-F": 0.5427997610733264,
      "MoBERT-N": 0.4847123112442992,
      "MoBERT-min(F/N)": 0.4847123112442992,
      "MoBERT-max(F/N)": 0.5427997610733264,
      "MotionCritic": -9.203838348388672,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.4067796610169492,
      "VeMo (random view)": 0.5,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks clockwise from the 3 o'clock position to the 11 o'clock position."
  },
  "011074": {
    "text": "a person walks in a counter clockwise circle.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5710547578758411,
      "Minus Multimodal Distance": -7.336588382720947,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 9.443919407203794e-05,
      "MoBERT-F": 0.4598263389624831,
      "MoBERT-N": 0.6255711213708164,
      "MoBERT-min(F/N)": 0.4598263389624831,
      "MoBERT-max(F/N)": 0.6255711213708164,
      "MotionCritic": -10.960184097290039,
      "VeMo (human-opt view)": 0.7775891341256367,
      "VeMo (max entropy view)": 0.7664884135472371,
      "VeMo (min entropy view)": 0.7775891341256367,
      "VeMo (random view)": 0.7664884135472371,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks in a counter - clockwise circle."
  },
  "004808": {
    "text": "a person walks, speeds up, and jumps.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0866179579222028,
      "Minus Multimodal Distance": -5.936439037322998,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.008102351799607277,
      "MoBERT-F": 0.4996171536099684,
      "MoBERT-N": 0.6675933622942799,
      "MoBERT-min(F/N)": 0.4996171536099684,
      "MoBERT-max(F/N)": 0.6675933622942799,
      "MotionCritic": -1.953252911567688,
      "VeMo (human-opt view)": 0.7432150313152401,
      "VeMo (max entropy view)": 0.7432150313152401,
      "VeMo (min entropy view)": 0.9495733126454616,
      "VeMo (random view)": 0.9495733126454616,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks, speeds up, and jumps."
  },
  "002876": {
    "text": "the person is moving something around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7658093862679795,
      "Minus Multimodal Distance": -7.125816345214844,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.105331416008994e-05,
      "MoBERT-F": 0.46133463041569245,
      "MoBERT-N": 0.486054744352895,
      "MoBERT-min(F/N)": 0.46133463041569245,
      "MoBERT-max(F/N)": 0.486054744352895,
      "MotionCritic": -5.145108699798584,
      "VeMo (human-opt view)": 0.8932515337423312,
      "VeMo (max entropy view)": 0.7770700636942676,
      "VeMo (min entropy view)": 0.8932515337423312,
      "VeMo (random view)": 0.7770700636942676,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is moving something around."
  },
  "004854": {
    "text": "the person is walking around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.222935784088881,
      "Minus Multimodal Distance": -8.237478256225586,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9776200652122498,
      "MoBERT-F": 0.7971900847243278,
      "MoBERT-N": 0.6471961096094884,
      "MoBERT-min(F/N)": 0.6471961096094884,
      "MoBERT-max(F/N)": 0.7971900847243278,
      "MotionCritic": -23.541885375976562,
      "VeMo (human-opt view)": 0.9914172074523759,
      "VeMo (max entropy view)": 0.9914172074523759,
      "VeMo (min entropy view)": 0.9919429239190602,
      "VeMo (random view)": 0.9914172074523759,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is walking around."
  },
  "004270": {
    "text": "person is reaching down.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6940464906261137,
      "Minus Multimodal Distance": -5.532825469970703,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0003875195689033717,
      "MoBERT-F": 0.5716510955997572,
      "MoBERT-N": 0.5858179325842677,
      "MoBERT-min(F/N)": 0.5716510955997572,
      "MoBERT-max(F/N)": 0.5858179325842677,
      "MotionCritic": -2.02126407623291,
      "VeMo (human-opt view)": 0.9947933863307342,
      "VeMo (max entropy view)": 0.9850567842199641,
      "VeMo (min entropy view)": 0.9947933863307342,
      "VeMo (random view)": 0.9947933863307342,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is reaching down."
  },
  "000750": {
    "text": "a man postures his arms like holding a dance partner and dances the waltz from the left hand side to the right hand side.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4447279954978837,
      "Minus Multimodal Distance": -3.0417680740356445,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.1425937777385116e-05,
      "MoBERT-F": 0.4216992446792641,
      "MoBERT-N": 0.5153127653057376,
      "MoBERT-min(F/N)": 0.4216992446792641,
      "MoBERT-max(F/N)": 0.5153127653057376,
      "MotionCritic": -4.449211120605469,
      "VeMo (human-opt view)": 0.7195767195767195,
      "VeMo (max entropy view)": 0.7195767195767195,
      "VeMo (min entropy view)": 0.7777777777777778,
      "VeMo (random view)": 0.7777777777777778,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man positions his arms as if holding a dance partner and waltzes from the left side to the right side."
  },
  "002544": {
    "text": "a person lifts up their arm at a 120 degree angle twice and then reverts their arm to the opposite lower part of their body.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7260384420305075,
      "Minus Multimodal Distance": -4.786994457244873,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4276754629681818e-05,
      "MoBERT-F": 0.41204830104942763,
      "MoBERT-N": 0.4278169919531571,
      "MoBERT-min(F/N)": 0.41204830104942763,
      "MoBERT-max(F/N)": 0.4278169919531571,
      "MotionCritic": -8.83387565612793,
      "VeMo (human-opt view)": 0.7194570135746606,
      "VeMo (max entropy view)": 0.7194570135746606,
      "VeMo (min entropy view)": 0.8083333333333333,
      "VeMo (random view)": 0.7194570135746606,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lifts their arm up at a 120 - degree angle twice and then reverts their arm to the opposite lower part of their body."
  },
  "012577": {
    "text": "a person pick up something with his left hand and with his right hand he is patting it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.37495430361925736,
      "Minus Multimodal Distance": -2.719425678253174,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 5.229314774624072e-05,
      "MoBERT-F": 0.2772026575065537,
      "MoBERT-N": 0.3822837437964062,
      "MoBERT-min(F/N)": 0.2772026575065537,
      "MoBERT-max(F/N)": 0.3822837437964062,
      "MotionCritic": -2.7067832946777344,
      "VeMo (human-opt view)": 0.4696629213483146,
      "VeMo (max entropy view)": 0.4696629213483146,
      "VeMo (min entropy view)": 0.34870317002881845,
      "VeMo (random view)": 0.34870317002881845,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person picks up something with his left hand and pats it with his right hand."
  },
  "001278": {
    "text": "a person raises their left arm to their face pensively while shifting their weight from side to side.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.3472129810178997,
      "Minus Multimodal Distance": -7.2536492347717285,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.054158150916919e-05,
      "MoBERT-F": 0.3073656845126107,
      "MoBERT-N": 0.4152042616518978,
      "MoBERT-min(F/N)": 0.3073656845126107,
      "MoBERT-max(F/N)": 0.4152042616518978,
      "MotionCritic": -10.63983154296875,
      "VeMo (human-opt view)": 0.909952606635071,
      "VeMo (max entropy view)": 0.909952606635071,
      "VeMo (min entropy view)": 0.9399293286219081,
      "VeMo (random view)": 0.9399293286219081,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person raises their left arm to their face pensively while shifting their weight from side to side."
  },
  "011213": {
    "text": "he stands and wobbles slightly, as if trying to stay steady in the wind",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.2754538018679941,
      "Minus Multimodal Distance": -11.150105476379395,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00012989847164135426,
      "MoBERT-F": 0.3305532868570398,
      "MoBERT-N": 0.33398031479462975,
      "MoBERT-min(F/N)": 0.3305532868570398,
      "MoBERT-max(F/N)": 0.33398031479462975,
      "MotionCritic": -3.4442715644836426,
      "VeMo (human-opt view)": 0.11901504787961696,
      "VeMo (max entropy view)": 0.2227204783258595,
      "VeMo (min entropy view)": 0.11901504787961696,
      "VeMo (random view)": 0.11901504787961696,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "He stands and wobbles slightly, as if trying to stay steady in the wind."
  },
  "006212": {
    "text": "person walks forward in a semi-straight line with arms extended out to the sides.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5728029307371627,
      "Minus Multimodal Distance": -8.712570190429688,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 5.488660099217668e-05,
      "MoBERT-F": 0.37737517172458535,
      "MoBERT-N": 0.40309054295844254,
      "MoBERT-min(F/N)": 0.37737517172458535,
      "MoBERT-max(F/N)": 0.40309054295844254,
      "MotionCritic": -6.016685962677002,
      "VeMo (human-opt view)": 0.99371746544606,
      "VeMo (max entropy view)": 0.9820419863417924,
      "VeMo (min entropy view)": 0.99371746544606,
      "VeMo (random view)": 0.99371746544606,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward in a semi - straight line with their arms extended out to the sides."
  },
  "005628": {
    "text": "someone walks with difficulty on their right side, then tries to run",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1719821457553152,
      "Minus Multimodal Distance": -4.566475868225098,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.1065809577703476,
      "MoBERT-F": 0.4759234100466512,
      "MoBERT-N": 0.5697287070187154,
      "MoBERT-min(F/N)": 0.4759234100466512,
      "MoBERT-max(F/N)": 0.5697287070187154,
      "MotionCritic": -4.663061618804932,
      "VeMo (human-opt view)": 0.8590021691973969,
      "VeMo (max entropy view)": 0.7544783983140148,
      "VeMo (min entropy view)": 0.8590021691973969,
      "VeMo (random view)": 0.7544783983140148,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "Someone walks with difficulty on their right side and then tries to run."
  },
  "013654": {
    "text": "the person looks as if they are strumming a guitar",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.18890867760629787,
      "Minus Multimodal Distance": -3.6641685962677,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 4.064315362484194e-05,
      "MoBERT-F": 0.3100467674223285,
      "MoBERT-N": 0.3753466213920513,
      "MoBERT-min(F/N)": 0.3100467674223285,
      "MoBERT-max(F/N)": 0.3753466213920513,
      "MotionCritic": -6.2033257484436035,
      "VeMo (human-opt view)": 0.36245954692556637,
      "VeMo (max entropy view)": 0.36245954692556637,
      "VeMo (min entropy view)": 0.06362545018007203,
      "VeMo (random view)": 0.36245954692556637,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person looks as if they are strumming a guitar."
  },
  "004719": {
    "text": "a person runs forward and then to the right.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.1252154725212475,
      "Minus Multimodal Distance": -7.514461040496826,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0002210609382018447,
      "MoBERT-F": 0.49929618035050416,
      "MoBERT-N": 0.6093487062016225,
      "MoBERT-min(F/N)": 0.49929618035050416,
      "MoBERT-max(F/N)": 0.6093487062016225,
      "MotionCritic": -2.4566755294799805,
      "VeMo (human-opt view)": 0.9553361542078044,
      "VeMo (max entropy view)": 0.9197475202885482,
      "VeMo (min entropy view)": 0.9553361542078044,
      "VeMo (random view)": 0.9553361542078044,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person runs forward and then to the right."
  },
  "008998": {
    "text": "the person did a kick spin to the right",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.2030202764102154,
      "Minus Multimodal Distance": -10.093135833740234,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.2883177482290193e-05,
      "MoBERT-F": 0.44994723261986014,
      "MoBERT-N": 0.5575277996396675,
      "MoBERT-min(F/N)": 0.44994723261986014,
      "MoBERT-max(F/N)": 0.5575277996396675,
      "MotionCritic": -7.363291263580322,
      "VeMo (human-opt view)": 0.48491879350348027,
      "VeMo (max entropy view)": 0.48491879350348027,
      "VeMo (min entropy view)": 0.23387872954764197,
      "VeMo (random view)": 0.23387872954764197,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person did a kick spin to the right."
  },
  "008216": {
    "text": "a man jumps down from a curve.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5909764285778087,
      "Minus Multimodal Distance": -4.785655975341797,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.008289273828268051,
      "MoBERT-F": 0.6604091602619626,
      "MoBERT-N": 0.6501130274166712,
      "MoBERT-min(F/N)": 0.6501130274166712,
      "MoBERT-max(F/N)": 0.6604091602619626,
      "MotionCritic": -4.233855724334717,
      "VeMo (human-opt view)": 0.00020384183431683772,
      "VeMo (max entropy view)": 0.00020384183431683772,
      "VeMo (min entropy view)": 2.4325014031110365e-05,
      "VeMo (random view)": 0.00020384183431683772,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A man jumps down from a curve."
  },
  "001218": {
    "text": "a man walks in a curved line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.670835128238036,
      "Minus Multimodal Distance": -7.15676212310791,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.385063246241771e-05,
      "MoBERT-F": 0.4017656997710116,
      "MoBERT-N": 0.47155527185487706,
      "MoBERT-min(F/N)": 0.4017656997710116,
      "MoBERT-max(F/N)": 0.47155527185487706,
      "MotionCritic": -0.6457417607307434,
      "VeMo (human-opt view)": 0.7653213751868461,
      "VeMo (max entropy view)": 0.7653213751868461,
      "VeMo (min entropy view)": 0.7879616963064295,
      "VeMo (random view)": 0.7879616963064295,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man walks in a curved line."
  },
  "002799": {
    "text": "a person stretching their right arm",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.40674088459676233,
      "Minus Multimodal Distance": -6.234210968017578,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.283163459855132e-05,
      "MoBERT-F": 0.3640999215159659,
      "MoBERT-N": 0.40379546309413333,
      "MoBERT-min(F/N)": 0.3640999215159659,
      "MoBERT-max(F/N)": 0.40379546309413333,
      "MotionCritic": -8.15965461730957,
      "VeMo (human-opt view)": 0.979678015307469,
      "VeMo (max entropy view)": 0.979678015307469,
      "VeMo (min entropy view)": 0.9860041228165347,
      "VeMo (random view)": 0.979678015307469,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is stretching their right arm."
  },
  "011184": {
    "text": "the person is running forward.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.353216283753914,
      "Minus Multimodal Distance": -6.382256507873535,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.73499341448769e-05,
      "MoBERT-F": 0.5943269153361699,
      "MoBERT-N": 0.5476298779210431,
      "MoBERT-min(F/N)": 0.5476298779210431,
      "MoBERT-max(F/N)": 0.5943269153361699,
      "MotionCritic": -7.25879430770874,
      "VeMo (human-opt view)": 0.9434467971053337,
      "VeMo (max entropy view)": 0.9149736644093304,
      "VeMo (min entropy view)": 0.9434467971053337,
      "VeMo (random view)": 0.9434467971053337,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person is running forward."
  },
  "003005": {
    "text": "he does a salsa dance",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.8944151203669749,
      "Minus Multimodal Distance": -10.17858600616455,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.8235391974449158,
      "MoBERT-F": 0.7090188012995888,
      "MoBERT-N": 0.7166265337654645,
      "MoBERT-min(F/N)": 0.7090188012995888,
      "MoBERT-max(F/N)": 0.7166265337654645,
      "MotionCritic": -10.43474006652832,
      "VeMo (human-opt view)": 0.5,
      "VeMo (max entropy view)": 0.5,
      "VeMo (min entropy view)": 0.18206278026905828,
      "VeMo (random view)": 0.5,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "He does a salsa dance."
  },
  "013736": {
    "text": "walking forward while waving.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5567119930158505,
      "Minus Multimodal Distance": -6.942802906036377,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0209842249751091,
      "MoBERT-F": 0.6129749793084374,
      "MoBERT-N": 0.6274843641065124,
      "MoBERT-min(F/N)": 0.6129749793084374,
      "MoBERT-max(F/N)": 0.6274843641065124,
      "MotionCritic": -2.7106940746307373,
      "VeMo (human-opt view)": 0.06754966887417219,
      "VeMo (max entropy view)": 0.07611548556430446,
      "VeMo (min entropy view)": 0.06754966887417219,
      "VeMo (random view)": 0.07611548556430446,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is walking forward while waving."
  },
  "013811": {
    "text": "a person is sitting with both arms out in front of themselves holding something, then they move their right hand away and make sliding motions with it.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4506450677387242,
      "Minus Multimodal Distance": -9.962332725524902,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.036804890958592e-05,
      "MoBERT-F": 0.32445148159260995,
      "MoBERT-N": 0.41690849236820254,
      "MoBERT-min(F/N)": 0.32445148159260995,
      "MoBERT-max(F/N)": 0.41690849236820254,
      "MotionCritic": -6.094811916351318,
      "VeMo (human-opt view)": 0.1482889733840304,
      "VeMo (max entropy view)": 0.1482889733840304,
      "VeMo (min entropy view)": 0.09508196721311475,
      "VeMo (random view)": 0.1482889733840304,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is sitting with both arms extended in front of them, holding something. Then, they move their right hand away and make sliding motions with it."
  },
  "003859": {
    "text": "person is walking forwards quite fast, then squats down to pick something up to then turn around and walk fast again. appears to be in a rush and moving an item",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.93798580693349,
      "Minus Multimodal Distance": -7.187711715698242,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.003794711781665683,
      "MoBERT-F": 0.46353003754275357,
      "MoBERT-N": 0.5019667160164734,
      "MoBERT-min(F/N)": 0.46353003754275357,
      "MoBERT-max(F/N)": 0.5019667160164734,
      "MotionCritic": -7.22167444229126,
      "VeMo (human-opt view)": 0.765399737876802,
      "VeMo (max entropy view)": 0.765399737876802,
      "VeMo (min entropy view)": 0.766743648960739,
      "VeMo (random view)": 0.765399737876802,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is walking forward quite fast, then squats down to pick something up. After that, they turn around and walk fast again. They appear to be in a rush and moving an item."
  },
  "006687": {
    "text": "a person raises their left hand above their head and motions downward. as if throwing an object toward the ground",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5707573458608574,
      "Minus Multimodal Distance": -4.781579494476318,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.00037262754631228745,
      "MoBERT-F": 0.49538425940145625,
      "MoBERT-N": 0.5928123542139733,
      "MoBERT-min(F/N)": 0.49538425940145625,
      "MoBERT-max(F/N)": 0.5928123542139733,
      "MotionCritic": -6.337364673614502,
      "VeMo (human-opt view)": 0.9402004839267196,
      "VeMo (max entropy view)": 0.9402004839267196,
      "VeMo (min entropy view)": 0.9467455621301775,
      "VeMo (random view)": 0.9402004839267196,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person raises their left hand above their head and motions downward, as if throwing an object toward the ground."
  },
  "010254": {
    "text": "he balances on his right foot and similarly flaps his arms to that of a large bird taking off",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4436532691595382,
      "Minus Multimodal Distance": -3.680177927017212,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0012566610239446163,
      "MoBERT-F": 0.581033557929937,
      "MoBERT-N": 0.5397428414145738,
      "MoBERT-min(F/N)": 0.5397428414145738,
      "MoBERT-max(F/N)": 0.581033557929937,
      "MotionCritic": -13.496979713439941,
      "VeMo (human-opt view)": 0.9243840271877655,
      "VeMo (max entropy view)": 0.9196956889264581,
      "VeMo (min entropy view)": 0.9243840271877655,
      "VeMo (random view)": 0.9243840271877655,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "He balances on his right foot and flaps his arms in a way similar to that of a large bird taking off"
  },
  "004192": {
    "text": "person moves forward to their left side and picks something up and does a full turn back",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9141472545624069,
      "Minus Multimodal Distance": -8.198640823364258,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.33987706899642944,
      "MoBERT-F": 0.6170533836415582,
      "MoBERT-N": 0.6532697456005567,
      "MoBERT-min(F/N)": 0.6170533836415582,
      "MoBERT-max(F/N)": 0.6532697456005567,
      "MotionCritic": -4.848510265350342,
      "VeMo (human-opt view)": 0.5925925925925926,
      "VeMo (max entropy view)": 0.5925925925925926,
      "VeMo (min entropy view)": 0.5935251798561151,
      "VeMo (random view)": 0.5925925925925926,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person moves forward to their left side, picks something up, and does a full turn backward."
  },
  "000742": {
    "text": "person stretching the whole right side by lifting the right arm and leaning sideways to the left",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7574718546686199,
      "Minus Multimodal Distance": -3.7941014766693115,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.5271921660751104e-05,
      "MoBERT-F": 0.3424701724925828,
      "MoBERT-N": 0.38353516063087695,
      "MoBERT-min(F/N)": 0.3424701724925828,
      "MoBERT-max(F/N)": 0.38353516063087695,
      "MotionCritic": -10.478860855102539,
      "VeMo (human-opt view)": 0.9808871258564731,
      "VeMo (max entropy view)": 0.9808871258564731,
      "VeMo (min entropy view)": 0.988306703561546,
      "VeMo (random view)": 0.9808871258564731,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person is stretching the entire right side of their body by lifting their right arm and leaning sideways to the left."
  },
  "013160": {
    "text": "the sim appears to walk forward bend slightly grabbing an object with their left hand.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5958732779177088,
      "Minus Multimodal Distance": -4.435187816619873,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 3.477361678960733e-05,
      "MoBERT-F": 0.3196118795695093,
      "MoBERT-N": 0.44904115877562867,
      "MoBERT-min(F/N)": 0.3196118795695093,
      "MoBERT-max(F/N)": 0.44904115877562867,
      "MotionCritic": -2.239952325820923,
      "VeMo (human-opt view)": 0.0231023102310231,
      "VeMo (max entropy view)": 0.08051082731815658,
      "VeMo (min entropy view)": 0.0231023102310231,
      "VeMo (random view)": 0.0231023102310231,
      "User-1 Score": 1.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The Sim appears to walk forward, bend slightly, and grab an object with their left hand."
  },
  "014310": {
    "text": "a man is walking forward stumbles and goes to the ground and get back up.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7888530719851186,
      "Minus Multimodal Distance": -2.3705008029937744,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.9580336809158325,
      "MoBERT-F": 0.547751776671505,
      "MoBERT-N": 0.41647769544852614,
      "MoBERT-min(F/N)": 0.41647769544852614,
      "MoBERT-max(F/N)": 0.547751776671505,
      "MotionCritic": -6.141049861907959,
      "VeMo (human-opt view)": 0.9820971867007673,
      "VeMo (max entropy view)": 0.9688206055128784,
      "VeMo (min entropy view)": 0.9820971867007673,
      "VeMo (random view)": 0.9688206055128784,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man is walking forward, stumbles, goes to the ground and gets back up."
  },
  "008668": {
    "text": "a person waving one hands in a horizontal circular motion in front of them",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.214726925532866,
      "Minus Multimodal Distance": -7.268898010253906,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.601480057113804e-05,
      "MoBERT-F": 0.3284739443900225,
      "MoBERT-N": 0.37378616891597694,
      "MoBERT-min(F/N)": 0.3284739443900225,
      "MoBERT-max(F/N)": 0.37378616891597694,
      "MotionCritic": -9.301233291625977,
      "VeMo (human-opt view)": 0.42346938775510207,
      "VeMo (max entropy view)": 0.42346938775510207,
      "VeMo (min entropy view)": 0.6652452025586354,
      "VeMo (random view)": 0.6652452025586354,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is waving one hand in a horizontal circular motion in front of them."
  },
  "008782": {
    "text": "a person walks forwards stiffly with both arms out in front of them.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.4806044001962365,
      "Minus Multimodal Distance": -2.3440375328063965,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.4577160729677416e-05,
      "MoBERT-F": 0.4489515790833025,
      "MoBERT-N": 0.5249865721330149,
      "MoBERT-min(F/N)": 0.4489515790833025,
      "MoBERT-max(F/N)": 0.5249865721330149,
      "MotionCritic": -4.93710470199585,
      "VeMo (human-opt view)": 0.05037094884810621,
      "VeMo (max entropy view)": 0.05037094884810621,
      "VeMo (min entropy view)": 0.04746580852775543,
      "VeMo (random view)": 0.04746580852775543,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward stiffly with both arms out in front of them."
  },
  "006245": {
    "text": "a person holds their arms near their face and searches right and left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.27424859580449407,
      "Minus Multimodal Distance": -8.944671630859375,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.574207610450685e-05,
      "MoBERT-F": 0.318703818992919,
      "MoBERT-N": 0.4489174716282073,
      "MoBERT-min(F/N)": 0.318703818992919,
      "MoBERT-max(F/N)": 0.4489174716282073,
      "MotionCritic": -7.300113677978516,
      "VeMo (human-opt view)": 0.9626955475330926,
      "VeMo (max entropy view)": 0.9626955475330926,
      "VeMo (min entropy view)": 0.9628061687329906,
      "VeMo (random view)": 0.9628061687329906,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person holds their arms near their face and looks right and left."
  },
  "008082": {
    "text": "the person was reaching to the right to pass something to the left.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.6181233934525868,
      "Minus Multimodal Distance": -2.7295796871185303,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.2376227207132615e-05,
      "MoBERT-F": 0.3221633959969928,
      "MoBERT-N": 0.42894907338549115,
      "MoBERT-min(F/N)": 0.3221633959969928,
      "MoBERT-max(F/N)": 0.42894907338549115,
      "MotionCritic": -4.398036956787109,
      "VeMo (human-opt view)": 0.6651270207852193,
      "VeMo (max entropy view)": 0.6651270207852193,
      "VeMo (min entropy view)": 0.6931106471816284,
      "VeMo (random view)": 0.6931106471816284,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The person was reaching to the right to pass something to the left."
  },
  "013846": {
    "text": "a person using the right arm to reach for something.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3083944172789372,
      "Minus Multimodal Distance": -5.131781578063965,
      "R1-Precision": 1.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.628893344081007e-05,
      "MoBERT-F": 0.3191251298619438,
      "MoBERT-N": 0.4190212748495311,
      "MoBERT-min(F/N)": 0.3191251298619438,
      "MoBERT-max(F/N)": 0.4190212748495311,
      "MotionCritic": -8.789693832397461,
      "VeMo (human-opt view)": 0.9820278060359444,
      "VeMo (max entropy view)": 0.9723756906077348,
      "VeMo (min entropy view)": 0.9820278060359444,
      "VeMo (random view)": 0.9723756906077348,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is using the right arm to reach for something."
  },
  "002718": {
    "text": "a person lunges forward bending their left knee and elbow.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.7618199146836226,
      "Minus Multimodal Distance": -12.265910148620605,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.667587877018377e-05,
      "MoBERT-F": 0.48801779254530575,
      "MoBERT-N": 0.5958241407666104,
      "MoBERT-min(F/N)": 0.48801779254530575,
      "MoBERT-max(F/N)": 0.5958241407666104,
      "MotionCritic": -4.110088348388672,
      "VeMo (human-opt view)": 0.8599605522682445,
      "VeMo (max entropy view)": 0.8593238822246456,
      "VeMo (min entropy view)": 0.8599605522682445,
      "VeMo (random view)": 0.8599605522682445,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person lunges forward, bending their left knee and elbow."
  },
  "008565": {
    "text": "someone slowly and dramatically walks forward in a straight line.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.3208051898145797,
      "Minus Multimodal Distance": -9.783259391784668,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.409795605577528e-05,
      "MoBERT-F": 0.41017787006599865,
      "MoBERT-N": 0.543369979556813,
      "MoBERT-min(F/N)": 0.41017787006599865,
      "MoBERT-max(F/N)": 0.543369979556813,
      "MotionCritic": -4.612512588500977,
      "VeMo (human-opt view)": 0.9241179313678105,
      "VeMo (max entropy view)": 0.9241179313678105,
      "VeMo (min entropy view)": 0.932229377491567,
      "VeMo (random view)": 0.9241179313678105,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "Someone walks forward slowly and dramatically in a straight line."
  },
  "007689": {
    "text": "a person walks forward and raises their arms in victory.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.493186255612534,
      "Minus Multimodal Distance": -10.165475845336914,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0002545579627621919,
      "MoBERT-F": 0.45877742118927667,
      "MoBERT-N": 0.5200114461483388,
      "MoBERT-min(F/N)": 0.45877742118927667,
      "MoBERT-max(F/N)": 0.5200114461483388,
      "MotionCritic": -6.955479145050049,
      "VeMo (human-opt view)": 0.9890044576523032,
      "VeMo (max entropy view)": 0.9890044576523032,
      "VeMo (min entropy view)": 0.9908716663683551,
      "VeMo (random view)": 0.9908716663683551,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward and raises their arms in victory."
  },
  "003483": {
    "text": "a man stands still and then starts dancing around.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7605716389101919,
      "Minus Multimodal Distance": -7.799056053161621,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.0004871312703471631,
      "MoBERT-F": 0.5552157620504654,
      "MoBERT-N": 0.5501302195911173,
      "MoBERT-min(F/N)": 0.5501302195911173,
      "MoBERT-max(F/N)": 0.5552157620504654,
      "MotionCritic": -6.4047980308532715,
      "VeMo (human-opt view)": 0.9199839163650985,
      "VeMo (max entropy view)": 0.9199839163650985,
      "VeMo (min entropy view)": 0.9285389167045972,
      "VeMo (random view)": 0.9199839163650985,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A man stands still and then starts dancing around."
  },
  "002661": {
    "text": "a person throws something upwards with two hands.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.9874214579973253,
      "Minus Multimodal Distance": -7.324192523956299,
      "R1-Precision": 0.0,
      "R2-Precision": 1.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.00047710174112580717,
      "MoBERT-F": 0.48267212410555105,
      "MoBERT-N": 0.5385763415375534,
      "MoBERT-min(F/N)": 0.48267212410555105,
      "MoBERT-max(F/N)": 0.5385763415375534,
      "MotionCritic": -7.095454692840576,
      "VeMo (human-opt view)": 0.9756496828320033,
      "VeMo (max entropy view)": 0.9399545602077247,
      "VeMo (min entropy view)": 0.9756496828320033,
      "VeMo (random view)": 0.9756496828320033,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person throws something upward with both hands."
  },
  "003868": {
    "text": "a person taking something from a shelf.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.20008564331940648,
      "Minus Multimodal Distance": -5.465868949890137,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 2.523165130696725e-05,
      "MoBERT-F": 0.33399348338098966,
      "MoBERT-N": 0.4011443455394948,
      "MoBERT-min(F/N)": 0.33399348338098966,
      "MoBERT-max(F/N)": 0.4011443455394948,
      "MotionCritic": -11.326904296875,
      "VeMo (human-opt view)": 2.019281557840802e-05,
      "VeMo (max entropy view)": 0.0008580492373168064,
      "VeMo (min entropy view)": 2.019281557840802e-05,
      "VeMo (random view)": 2.019281557840802e-05,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person is taking something from a shelf."
  },
  "012032": {
    "text": "a person jogs forward for several seconds.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.7159133328055722,
      "Minus Multimodal Distance": -4.688171863555908,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 7.979268411872908e-05,
      "MoBERT-F": 0.49104951402659847,
      "MoBERT-N": 0.6302799146116191,
      "MoBERT-min(F/N)": 0.49104951402659847,
      "MoBERT-max(F/N)": 0.6302799146116191,
      "MotionCritic": -8.037941932678223,
      "VeMo (human-opt view)": 0.9902956039533116,
      "VeMo (max entropy view)": 0.9647812166488794,
      "VeMo (min entropy view)": 0.9902956039533116,
      "VeMo (random view)": 0.9647812166488794,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person jogs forward for several seconds."
  },
  "006844": {
    "text": "a person falls to their knees.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.5131416870942941,
      "Minus Multimodal Distance": -13.3744535446167,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.668592631816864,
      "MoBERT-F": 0.6138533815184977,
      "MoBERT-N": 0.5524943168511361,
      "MoBERT-min(F/N)": 0.5524943168511361,
      "MoBERT-max(F/N)": 0.6138533815184977,
      "MotionCritic": -7.427525043487549,
      "VeMo (human-opt view)": 0.9976892567684084,
      "VeMo (max entropy view)": 0.9976892567684084,
      "VeMo (min entropy view)": 0.9985041053996254,
      "VeMo (random view)": 0.9985041053996254,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person falls to their knees."
  },
  "001369": {
    "text": "a person walks forward, raises his right arm in front of him, then lowers his arm and walks backwards.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -0.4077472505968804,
      "Minus Multimodal Distance": -7.006435871124268,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 2.7350768505129963e-05,
      "MoBERT-F": 0.32126943286261067,
      "MoBERT-N": 0.46353371527155535,
      "MoBERT-min(F/N)": 0.32126943286261067,
      "MoBERT-max(F/N)": 0.46353371527155535,
      "MotionCritic": -11.856035232543945,
      "VeMo (human-opt view)": 0.955291454442558,
      "VeMo (max entropy view)": 0.9400778210116731,
      "VeMo (min entropy view)": 0.955291454442558,
      "VeMo (random view)": 0.9400778210116731,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person walks forward, raises his right arm in front of himself, then lowers his arm and walks backwards."
  },
  "012716": {
    "text": "a person continuously jogs counter clockwise.",
    "oracle human label": 1,
    "score": {
      "Minus L1 Distance": -1.0018897708887775,
      "Minus Multimodal Distance": -4.480542182922363,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 1.0,
      "MoBERT-base": 0.0110296830534935,
      "MoBERT-F": 0.6564688106292038,
      "MoBERT-N": 0.7392511204376232,
      "MoBERT-min(F/N)": 0.6564688106292038,
      "MoBERT-max(F/N)": 0.7392511204376232,
      "MotionCritic": -10.797002792358398,
      "VeMo (human-opt view)": 0.859538784067086,
      "VeMo (max entropy view)": 0.859538784067086,
      "VeMo (min entropy view)": 0.9241730279898219,
      "VeMo (random view)": 0.9241730279898219,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person continuously jogs counter - clockwise."
  },
  "009968": {
    "text": "the man puts the box down and runs",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -1.1825312256177707,
      "Minus Multimodal Distance": -11.648530006408691,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.9964951872825623,
      "MoBERT-F": 0.796154121743317,
      "MoBERT-N": 0.6995647902335305,
      "MoBERT-min(F/N)": 0.6995647902335305,
      "MoBERT-max(F/N)": 0.796154121743317,
      "MotionCritic": -5.868832588195801,
      "VeMo (human-opt view)": 0.009696056267269531,
      "VeMo (max entropy view)": 0.013290802764486975,
      "VeMo (min entropy view)": 0.009696056267269531,
      "VeMo (random view)": 0.013290802764486975,
      "User-1 Score": 1.0,
      "User-2 Score": 1.0
    },
    "revised_text": "The man puts the box down and runs."
  },
  "014387": {
    "text": "a person walks forward  to bow then place hands on shoulders of other person and bows again.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.49122205452993534,
      "Minus Multimodal Distance": -7.2302069664001465,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.2415560632944107,
      "MoBERT-F": 0.5856097163493457,
      "MoBERT-N": 0.5360291274928882,
      "MoBERT-min(F/N)": 0.5360291274928882,
      "MoBERT-max(F/N)": 0.5856097163493457,
      "MotionCritic": -5.370314598083496,
      "VeMo (human-opt view)": 0.37751004016064255,
      "VeMo (max entropy view)": 0.43799472295514513,
      "VeMo (min entropy view)": 0.37751004016064255,
      "VeMo (random view)": 0.37751004016064255,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "A person walks forward to bow, then places hands on the shoulders of the other person and bows again."
  },
  "006089": {
    "text": "a person stands still and then bounces their hand as if playing with a yo-yo",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.29051032080412653,
      "Minus Multimodal Distance": -8.037364959716797,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 6.186048995004967e-05,
      "MoBERT-F": 0.2569851037237305,
      "MoBERT-N": 0.38201304752468196,
      "MoBERT-min(F/N)": 0.2569851037237305,
      "MoBERT-max(F/N)": 0.38201304752468196,
      "MotionCritic": -7.082686901092529,
      "VeMo (human-opt view)": 0.6797385620915033,
      "VeMo (max entropy view)": 0.6797385620915033,
      "VeMo (min entropy view)": 0.7426597582037997,
      "VeMo (random view)": 0.6797385620915033,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands still and then bounces their hand as if playing with a yo - yo."
  },
  "006241": {
    "text": "the person is jogging with dumbbells.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.9493190601533471,
      "Minus Multimodal Distance": -7.0062785148620605,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 3.7501449696719646e-05,
      "MoBERT-F": 0.48653949014845355,
      "MoBERT-N": 0.5391105477593747,
      "MoBERT-min(F/N)": 0.48653949014845355,
      "MoBERT-max(F/N)": 0.5391105477593747,
      "MotionCritic": -1.339268684387207,
      "VeMo (human-opt view)": 2.80803315767938e-06,
      "VeMo (max entropy view)": 2.81955089861575e-06,
      "VeMo (min entropy view)": 2.80803315767938e-06,
      "VeMo (random view)": 2.80803315767938e-06,
      "User-1 Score": 0.0,
      "User-2 Score": 0.0
    },
    "revised_text": "The person is jogging with dumbbells."
  },
  "007286": {
    "text": "a person stands at the edge of a cliff trying to steel themselves for a jump, but failing to jump off.",
    "oracle human label": 0,
    "score": {
      "Minus L1 Distance": -0.5320228832863031,
      "Minus Multimodal Distance": -5.006595611572266,
      "R1-Precision": 0.0,
      "R2-Precision": 0.0,
      "R3-Precision": 0.0,
      "MoBERT-base": 0.7766187191009521,
      "MoBERT-F": 0.7167023461334531,
      "MoBERT-N": 0.7887856396066762,
      "MoBERT-min(F/N)": 0.7167023461334531,
      "MoBERT-max(F/N)": 0.7887856396066762,
      "MotionCritic": -13.057809829711914,
      "VeMo (human-opt view)": 0.09511568123393316,
      "VeMo (max entropy view)": 0.2939521800281294,
      "VeMo (min entropy view)": 0.09511568123393316,
      "VeMo (random view)": 0.09511568123393316,
      "User-1 Score": 0.0,
      "User-2 Score": 1.0
    },
    "revised_text": "A person stands at the edge of a cliff, trying to steel themselves for a jump but failing to jump off."
  }
}