[
  {
    "label": "ppoextendedprocess"
  },
  {
    "label": "q"
  },
  {
    "label": "rolloutmodule"
  },
  {
    "label": "simulationllm"
  },
  {
    "label": "policymodel"
  },
  {
    "label": "contextualadapter"
  },
  {
    "label": "o"
  },
  {
    "label": "valuemodel"
  },
  {
    "label": "calibrationmodule"
  },
  {
    "label": "rewardmodel"
  },
  {
    "label": "referencemodel"
  },
  {
    "label": "v"
  },
  {
    "label": "r"
  },
  {
    "label": "gaeaggregationmodule"
  },
  {
    "label": "a"
  },
  {
    "label": "policymodel"
  },
  {
    "label": "grpoextendedprocess"
  },
  {
    "label": "q"
  },
  {
    "label": "rolloutmodule"
  },
  {
    "label": "simulationllm"
  },
  {
    "label": "policymodel"
  },
  {
    "label": "diversitymodule"
  },
  {
    "label": "o"
  },
  {
    "label": "o"
  },
  {
    "label": "og"
  },
  {
    "label": "rewardmodel"
  },
  {
    "label": "referencemodel"
  },
  {
    "label": "diversityevaluator"
  },
  {
    "label": "r"
  },
  {
    "label": "r"
  },
  {
    "label": "rg"
  },
  {
    "label": "groupcomputation"
  },
  {
    "label": "q"
  },
  {
    "label": "a"
  },
  {
    "label": "a"
  },
  {
    "label": "ag"
  },
  {
    "label": "policymodel"
  },
  {
    "label": "trainedmodel"
  },
  {
    "label": "frozenmodel"
  },
  {
    "label": "simulationllm"
  },
  {
    "label": "contextualadapter"
  },
  {
    "label": "calibrationmodule"
  },
  {
    "label": "diversitymoduleevaluator"
  }
]