[
  {
    "label": "policymodel"
  },
  {
    "label": "grpo"
  },
  {
    "label": "policymodel"
  },
  {
    "label": "dpo"
  },
  {
    "label": "referencemodel"
  },
  {
    "label": "ref"
  },
  {
    "label": "trainingloop"
  },
  {
    "label": "replaybuffer"
  },
  {
    "label": "rollout"
  },
  {
    "label": "verifier"
  },
  {
    "label": "chosen"
  },
  {
    "label": "rejected"
  },
  {
    "label": "dposample"
  },
  {
    "label": "sampleindex"
  },
  {
    "label": "klconstraint"
  },
  {
    "label": "paramupdate"
  },
  {
    "label": "metaupdate"
  },
  {
    "label": "verifier"
  },
  {
    "label": "output"
  },
  {
    "label": "o"
  },
  {
    "label": "o"
  },
  {
    "label": "o"
  },
  {
    "label": "o"
  },
  {
    "label": "multisampleoutputs"
  },
  {
    "label": "reward"
  },
  {
    "label": "rewardfunction"
  },
  {
    "label": "sampling"
  },
  {
    "label": "r"
  },
  {
    "label": "r"
  },
  {
    "label": "r"
  },
  {
    "label": "r"
  },
  {
    "label": "rewardscores"
  },
  {
    "label": "groupcomputation"
  },
  {
    "label": "norm"
  },
  {
    "label": "a"
  },
  {
    "label": "a"
  },
  {
    "label": "a"
  },
  {
    "label": "a"
  },
  {
    "label": "advantagescores"
  },
  {
    "label": "advantage"
  },
  {
    "label": "o"
  },
  {
    "label": "samplingoutput"
  },
  {
    "label": "r"
  },
  {
    "label": "rewardscore"
  },
  {
    "label": "a"
  },
  {
    "label": "advantagescore"
  },
  {
    "label": "r"
  },
  {
    "label": "thinkcontent"
  },
  {
    "label": "a"
  },
  {
    "label": "answercontent"
  },
  {
    "label": "g"
  },
  {
    "label": "groundtruth"
  },
  {
    "label": "klconstraint"
  },
  {
    "label": "othinkrthinkansweraanswer"
  },
  {
    "label": "r"
  },
  {
    "label": "tokenizer"
  },
  {
    "label": "lengthcheck"
  },
  {
    "label": "charlim"
  },
  {
    "label": "replaybuffer"
  },
  {
    "label": "r"
  },
  {
    "label": "a"
  },
  {
    "label": "llm"
  },
  {
    "label": "fusion"
  },
  {
    "label": "consistencycheck"
  },
  {
    "label": "r"
  },
  {
    "label": "rule"
  },
  {
    "label": "heuristic"
  },
  {
    "label": "repetitioncheck"
  },
  {
    "label": "a"
  },
  {
    "label": "g"
  },
  {
    "label": "compare"
  },
  {
    "label": "softmatch"
  },
  {
    "label": "accuracycheck"
  },
  {
    "label": "referencemodel"
  },
  {
    "label": "chosen"
  },
  {
    "label": "rejected"
  },
  {
    "label": "chosen"
  },
  {
    "label": "rejected"
  },
  {
    "label": "auxiliarypath"
  },
  {
    "label": "diversitycheck"
  },
  {
    "label": "o"
  },
  {
    "label": "o"
  },
  {
    "label": "similarity"
  },
  {
    "label": "chosen"
  },
  {
    "label": "rejected"
  },
  {
    "label": "biasfilter"
  },
  {
    "label": "o"
  },
  {
    "label": "stat"
  },
  {
    "label": "chosen"
  },
  {
    "label": "rejected"
  }
]