_target_:  lambda_ac.qpn.QPN

defaults:
  - _self_
  - model: identity
  - critic: mlp
  - actor: deterministic_actor
  - planning_strategy: actor_forward #change to actor_forward for default planning strategy 

critic_target: ${agent.critic}

lr: 0.001
actor_lr: 0.001
model_lr: 0.001
encoder_lr: 0.001
critic_gamma: 0.99
actor_gamma: 0.99
tau: 0.01
target_update_interval: 1
model_grad_clip: 10
rl_grad_clip: 10
n_step_target_depth: 1
actor_rollout_depth: 4
critic_rollout_depth: 4
model_train_depth: 5
automatic_entropy_tuning: false
alpha: 0.1
share_encoder: true
rho: 0.99

update_encoder_model: True
update_encoder_critic: False
update_encoder_actor: False

use_svg_policy_update: true
use_muzero_target: False
start_model_target_from_zero: False

encoder_delayed_target: True
update_encoder_every_n_steps: 2

discretize_done_actor: true
discretize_done_critic: true
predict_done: true

exploration_noise_model: true
td_average: True

agent_hidden_dim: 512
agent_hidden_layers: 2

encoder_normalize: False
critic_spectral_norm: false
model_spectral_norm: false
depend_on_hidden: false

model_based_search: true

actor_horizon_scheduler: 
  _target_: lambda_ac.util.schedulers.LinearSchedule
  start: 0
  final: ${agent.actor_rollout_depth}
  duration: 25000

critic_horizon_scheduler: 
  _target_: lambda_ac.util.schedulers.LinearSchedule
  start: 0
  final: ${agent.critic_rollout_depth}
  duration: 25000

model_losses:
  - [vaml, 1.]
  - [mse, 1.]

device: ${device}
