# @package _group_
name: "mbpo"

normalize: true
normalize_double_precision: true
target_is_delta: true
learned_rewards: true
freq_train_model: ${overrides.freq_train_model}
real_data_ratio: 0.0

sac_samples_action: true
initial_exploration_steps: 5000
random_initial_explore: false
num_eval_episodes: 20

# --------------------------------------------
#          SAC Agent configuration
# --------------------------------------------
agent:
  _target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC
  num_inputs: ???
  action_space:
    _target_: gym.env.Box
    low: ???
    high: ???
    shape: ???
  args:
    gamma: ${overrides.sac_gamma}
    tau: ${overrides.sac_tau}
    alpha: ${overrides.sac_alpha}
    policy: ${overrides.sac_policy}
    target_update_interval: ${overrides.sac_target_update_interval}
    automatic_entropy_tuning: ${overrides.sac_automatic_entropy_tuning}
    target_entropy: ${overrides.sac_target_entropy}
    hidden_size: ${overrides.sac_hidden_size}
    device: ${device}
    lr: ${overrides.sac_lr}