# ant ME-TRPO style
args:
    env_name: Ant-v2
    outer_steps: 3000
    policy_iters: 200
    model_epochs: 2000
    reward_head: False
    update_timestep: 50000
    steps: 100
    num_models: 5
    max_timesteps: 100000
    pca: 0
    lam: 0
    