params:
  seed: 5
  algo:
    name: sac

  model:
    name: soft_actor_critic

  network:
    name: soft_actor_critic
    separate: True
    space:
      continuous:
    mlp:
      units: [256, 128, 64]
      activation: relu

      initializer:
        name: default
    log_std_bounds: [-5, 2]

  config:
    name: HalfCheetah-v4_SAC
    env_name: envpool
    normalize_input: True
    reward_shaper:
      scale_value: 1.0

    max_epochs: 40000
    num_steps_per_episode: 2
    save_best_after: 500
    save_frequency: 1000
    gamma: 0.99
    init_alpha: 1.0
    alpha_lr: 5e-3
    actor_lr: 5e-4
    critic_lr: 5e-4
    critic_tau: 0.005
    batch_size: 2048
    learnable_temperature: True
    num_warmup_steps: 50 # total number of warmup steps: num_actors * num_steps_per_episode * num_warmup_steps
    replay_buffer_size: 1000000
    num_actors: 32

    env_config:
      env_name: HalfCheetah-v4
      seed: 5

    player:
      render: True
      deterministic: True
      games_num: 100