# halfcheetah ME-TRPO style
args:
    env_name: Hopper-v2
    reward_head: True
    logvar_head: True
    states: 'uniform'
    update_timestep: 50000
    steps_k: [1, 15, 20, 100]
    num_rollouts_per_step: 400
    policy_update_steps: 20
    train_policy_every: 1
    train_val_ratio: 0.2
    real_sample_ratio: 0.05
    model_update_freq: 250
    max_timesteps: 100000
    num_models: 7
    num_elites: 5