# @package hp_student

# DDPG agents
agents:
  # Actions
  action:
    add_noise: null    # None or OU noise
    noise_factor: 1
    noise_half_decay_time: 1e6
    bound: [ -1, 1 ]
    magnitude: 50

  # Actor-Critic
  initial_loss: 100
  soft_alpha: 0.005
  learning_rate_actor: 0.0003
  learning_rate_critic: 0.0003
  add_target_action_noise: true
  gamma_discount: 0.99
  model_path: ${logger.model_save_dir}
  training_by_steps: ${general.training_by_steps}
  max_training_steps: ${general.max_training_steps}
  max_training_episodes: ${general.max_training_episodes}
  max_steps_per_episode: ${general.max_steps_per_episode}
  evaluation_period: ${general.evaluation_period}
  max_evaluation_steps: ${general.max_evaluation_steps}
  use_taylor_nn: false
  taylor_editing: false
  iteration_times: 3

  # Replay buffer
  replay_buffer:

    # Solo replay buffer
    solo_buffer:
      batch_size: 512
      buffer_size: 2e5
      experience_prefill_size: 512

    # Dual replay buffer
    dual_buffer:
      batch_size: 512
      experience_prefill_size: 512

      # Student
      student:
        buffer_size: 1e5

      # Teacher
      teacher:
        buffer_size: 1e5

  # Unknown distribution
  unknown_distribution:
    apply: false

  # Checkpoint
  checkpoint: ${general.checkpoint}