HalfCheetah-v4:
  parameters:
    learning_rate: 0.0003 # The learning rate, it can be a function
    use_sde: False
    sde_sample_freq: -1
    batch_size: 256
    learning_starts: 10000
    tau: 0.005
    gamma: 0.99
    train_freq: 1
    cm_w: 1
    complexity_learning_rate: 0.1
    pre_train_steps: 5000
    delay_update: 15000
    norm_parameter: 1.0
    unnormalize_model: True
    model_kwargs: { lr: lin_0.001, model_arch: [ 128,128,128 ] }
  environment:
    name: HalfCheetah-v4
    render_mode: rgb_array
  wrappers:
    - stable_baselines3.common.vec_env.VecNormalize

Ant-v4:
  parameters:
    learning_rate: 0.0003 # The learning rate, it can be a function
    use_sde: False
    sde_sample_freq: -1
    batch_size: 256
    learning_starts: 10000
    tau: 0.005
    gamma: 0.99
    train_freq: 1
    cm_w: 1
    model_steps: 64
    model_grad_steps: 12
    clip_likelihood: 5
    likelihood: True
    vf_compl_coef: 0
    complexity_learning_rate: 0.1
    pre_train_steps: 5000
    delay_update: 500
    norm_parameter: 1.0
    unnormalize_model: True
    model_kwargs: { lr: lin_0.001, model_arch: [ 256,256,256 ] }
  environment:
    name: Ant-v4
    render_mode: rgb_array
  wrappers:
    - stable_baselines3.common.vec_env.VecNormalize

Hopper-v4:
  parameters:
    learning_rate: 0.0003 # The learning rate, it can be a function
    use_sde: False
    sde_sample_freq: -1
    batch_size: 256
    learning_starts: 10000
    tau: 0.005
    gamma: 0.99
    train_freq: 1
    cm_w: 1
    constrain_complexity: True
    model_steps: 64
    model_grad_steps: 12
    clip_likelihood: 5
    likelihood: True
    vf_compl_coef: 0
    complexity_learning_rate: 0.1
    pre_train_steps: 5000
    delay_update: 10000
    norm_parameter: 1.0
    unnormalize_model: True
    model_kwargs: { lr: lin_0.001, model_arch: [ 64,64,64 ] }
  environment:
    name: Hopper-v4
    render_mode: rgb_array
  wrappers:
    - stable_baselines3.common.vec_env.VecNormalize

Walker2d-v4:
  parameters:
    learning_rate: 0.0003 # The learning rate, it can be a function
    use_sde: False
    sde_sample_freq: -1
    batch_size: 256
    learning_starts: 10000
    tau: 0.005
    gamma: 0.99
    train_freq: 1
    cm_w: 1
    model_steps: 64
    model_grad_steps: 12
    clip_likelihood: 5
    likelihood: True
    vf_compl_coef: 0
    complexity_learning_rate: 0.1
    pre_train_steps: 5000
    delay_update: 500
    norm_parameter: 1.0
    unnormalize_model: True
    model_kwargs: { lr: lin_0.001, model_arch: [ 64,64,64 ] }
  environment:
    name: Walker2d-v4
    render_mode: rgb_array
  wrappers:
    - stable_baselines3.common.vec_env.VecNormalize