# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
name: sac
agent:
  _target_: agent.sac_expert.SACAgent
  obs_dim: ??? # to be specified later
  action_dim: ??? # to be specified later
  action_range: ??? # to be specified later
  goal_dim: ??? # to be specified later
  env_id_dim: ???
  device: ${device}
  critic_cfg: ${double_q_critic}
  actor_cfg: ${diag_gaussian_actor}
  discount: 0.99
  init_temperature: 0.1
  alpha_lr: 1e-4
  alpha_betas: [0.9, 0.999]
  actor_lr: 1e-4
  actor_betas: [0.9, 0.999]
  actor_update_frequency: 1
  critic_lr: 1e-4
  critic_betas: [0.9, 0.999]
  critic_tau: 0.005
  critic_target_update_frequency: 2
  batch_size: 1024
  learnable_temperature: true

double_q_critic:
  _target_: agent.critic.DoubleQCritic
  obs_dim: ${agent.obs_dim}
  action_dim: ${agent.action_dim}
  goal_dim: ${agent.goal_dim}
  env_id_dim: ${agent.env_id_dim}
  hidden_dim: 1024
  hidden_depth: 2
  output_dim: 1

diag_gaussian_actor:
  _target_: agent.actor.DiagGaussianActor
  obs_dim: ${agent.obs_dim}
  action_dim: ${agent.action_dim}
  goal_dim: ${agent.goal_dim}
  env_id_dim: ${agent.env_id_dim}
  hidden_depth: 2
  hidden_dim: 1024
  log_std_bounds: [-5, 2]

# Environment
env: reacher_easy
goal_mode: multi_goal
single_task: reach-v2

# Representation
representation:
  learn_w: true
  learn_phi: true
  # vector, mlp, distr
  w_model: 'mlp'
  phi_model: 'mlp'
  p_expert: 0.85
  reg_lambda: 0.0
  w_norm: true
  latent_size: 14
  phi_hidden_dim: 1024
  w_hidden_dim: 64

experiment: vanilla
expert_model_date: ???

num_train_steps: 1e6
replay_buffer_capacity: ${num_train_steps}

num_seed_steps: 500

eval_frequency: 30
num_eval_episodes: 10

device: cuda

# logger
log_frequency: 10000
log_save_tb: true
ckpt_frequency: 50

reload_weights: True

# video recorder
save_video: true

seed: 1

# hydra configuration
hydra:
    run:
        dir: ./runs/${now:%Y.%m.%d}/${env}_${experiment}_${hydra.job.override_dirname}/seed=${seed}
    job:
      config:
        override_dirname:
          exclude_keys:
            - seed
            - expert_model_date
