# @package _group_
name: gradnorm_deepmdp
encoder_feature_dim: 50
num_layers: 4
num_filters: 32

decoder: ${agent.encoder.pixel}
reward_decoder:
  feature_dim: ${agent.encoder_feature_dim}

gradnorm_optimizer_cfg: ${agent.optimizers.decoder}
reward_decoder_optimizer: ${agent.optimizers.decoder}
transition_model_optimizer: ${agent.optimizers.decoder}


builder:
  _target_: mtrl.agent.gradnorm.Agent
  agent_cfg:
    _target_: mtrl.agent.deepmdp.Agent
    # obs_shape
    # action_shape
    # device
    actor_cfg: ${agent.actor}
    critic_cfg: ${agent.critic}
    decoder_cfg: ${agent.decoder}
    reward_decoder_cfg: ${agent.reward_decoder}
    transition_model_cfg: ${agent.transition_model}
    multitask_cfg: ${agent.multitask}
    gradnorm_cfg: ${agent.gradnorm}
    alpha_optimizer_cfg: ${agent.optimizers.alpha}
    actor_optimizer_cfg: ${agent.optimizers.actor}
    critic_optimizer_cfg: ${agent.optimizers.critic}
    decoder_optimizer_cfg: ${agent.optimizers.decoder}
    encoder_optimizer_cfg: ${agent.optimizers.encoder}
    gradnorm_optimizer_cfg: ${agent.gradnorm_optimizer_cfg}
    reward_decoder_optimizer_cfg: ${agent.reward_decoder_optimizer}
    transition_model_optimizer_cfg: ${agent.transition_model_optimizer}
    discount: 0.99
    init_temperature: 0.1
    actor_update_freq: 2
    critic_tau: 0.01
    critic_target_update_freq: 2
    encoder_tau: 0.05
    decoder_update_freq: 1
    decoder_latent_lambda: 0.0
  multitask_cfg: ${agent.multitask}
