# === Mujoco Envs ===

InvertedPendulum-v4: &inverted-pendulum
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  gamma: 0.9999
  buffer_size: 200000
  learning_starts: 10000
  noise_type: 'normal'
  noise_std: 0.1
  gradient_steps: -1
  train_freq: 256
  learning_rate: !!float 1e-4
  policy_kwargs: "dict(net_arch=[256, 256])"

InvertedPendulumWide-v4:
  <<: *inverted-pendulum

InvertedDoublePendulum-v4:
  gamma: 0.9999
  <<: *inverted-pendulum

InvertedDoublePendulumWide-v4:
  <<: *inverted-pendulum

HalfCheetah-v4: &mujoco-defaults
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  learning_starts: 10000
  noise_type: 'normal'
  noise_std: 0.1

Ant-v4:
  <<: *mujoco-defaults

Hopper-v4:
  <<: *mujoco-defaults

Walker2d-v4:
  <<: *mujoco-defaults

Humanoid-v4:
  <<: *mujoco-defaults
  n_timesteps: !!float 1e6
  # SAC Hyperparams
  train_freq: 1
  gradient_steps: 1
  learning_rate: !!float 3e-4
  batch_size: 256

HumanoidStandup-v4:
  <<: *mujoco-defaults
  n_timesteps: !!float 1e6
  # SAC Hyperparams
  train_freq: 1
  gradient_steps: 1
  learning_rate: !!float 3e-4
  batch_size: 256

Swimmer-v4:
  <<: *mujoco-defaults
  gamma: 0.9999
  train_freq: 1
  gradient_steps: 1
  batch_size: 256
  learning_rate: !!float 1e-4

  
#####################################################
PandaPush-v3: &panda
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  buffer_size: 1000000
  batch_size: 256
  gamma: 0.95
  learning_rate: 0.001
  learning_starts: 1000
  tau: 0.05
  policy_kwargs: "dict(net_arch=[256,256,256])"
  noise_type: 'normal'
  noise_std: 0.2
  random_action_prob: 0.3
  train_freq: 2
  gradient_steps: 1

PandaSlide-v3:
  <<: *panda

PandaPickAndPlace-v3:
  <<: *panda
  batch_size: 512

PandaFlip-v3:
  <<: *panda
  batch_size: 512

PandaStack-v3:
  <<: *panda

PandaReach-v3:
  <<: *panda

Goal2D-v0: &predator_prey
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  gamma: 0.99
  buffer_size: 1000000
  learning_starts: 10000
  batch_size: 128
  noise_type: 'normal'
  noise_std: 0.1
  gradient_steps: 32
  train_freq: 64
  learning_rate: !!float 1e-3
  policy_kwargs: "dict(net_arch=[64,64])"